Split sequences rewriter (#4194)

This is in preparation for making the strings rewriter configurable for stats. This moves all utility functions from SequencesRewriter to a proper place. This includes three new groupings of utility functions: those involving arithmetic entailments, those involving string entailments, those involving regular expression entailments. Here, "entailments" loosely means any question regarding a (set of) terms or formulas. No major code changes. Added some missing documentation and lightly cleaned a few blocks of code in cpp.
author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2020-04-03 09:43:12 -0500
committer: GitHub <noreply@github.com> 2020-04-03 09:43:12 -0500
commit: d91b52085d7e3bbda65117c0cd88433aed383aff (patch)
tree: 5ed2055704066d28a3247a82030ed44bfeda4a57 /src/theory/strings/strings_entail.cpp
parent: e24e6f3620996ee9e5010d30fefc51247cc55fdc (diff)
1 files changed, 994 insertions, 0 deletions
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp
new file mode 100644
index 000000000..a1abfabe5
--- /dev/null
+++ b/src/theory/strings/strings_entail.cpp
@@ -0,0 +1,994 @@
+/*********************                                                        */
+/*! \file strings_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of entailment tests involving strings.
+ **/
+
+#include "theory/strings/strings_entail.h"
+
+#include "expr/node_builder.h"
+#include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+bool StringsEntail::canConstantContainConcat(Node c,
+                                             Node n,
+                                             int& firstc,
+                                             int& lastc)
+{
+  Assert(c.isConst());
+  CVC4::String t = c.getConst<String>();
+  const std::vector<unsigned>& tvec = t.getVec();
+  Assert(n.getKind() == STRING_CONCAT);
+  // must find constant components in order
+  size_t pos = 0;
+  firstc = -1;
+  lastc = -1;
+  for (unsigned i = 0; i < n.getNumChildren(); i++)
+  {
+    if (n[i].isConst())
+    {
+      firstc = firstc == -1 ? i : firstc;
+      lastc = i;
+      CVC4::String s = n[i].getConst<String>();
+      size_t new_pos = t.find(s, pos);
+      if (new_pos == std::string::npos)
+      {
+        return false;
+      }
+      else
+      {
+        pos = new_pos + s.size();
+      }
+    }
+    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
+    {
+      // find the first occurrence of a digit starting at pos
+      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
+      {
+        pos++;
+      }
+      if (pos == tvec.size())
+      {
+        return false;
+      }
+      // must consume at least one digit here
+      pos++;
+    }
+  }
+  return true;
+}
+
+bool StringsEntail::canConstantContainList(Node c,
+                                           std::vector<Node>& l,
+                                           int& firstc,
+                                           int& lastc)
+{
+  Assert(c.isConst());
+  // must find constant components in order
+  size_t pos = 0;
+  firstc = -1;
+  lastc = -1;
+  for (unsigned i = 0; i < l.size(); i++)
+  {
+    if (l[i].isConst())
+    {
+      firstc = firstc == -1 ? i : firstc;
+      lastc = i;
+      size_t new_pos = Word::find(c, l[i], pos);
+      if (new_pos == std::string::npos)
+      {
+        return false;
+      }
+      else
+      {
+        pos = new_pos + Word::getLength(l[i]);
+      }
+    }
+  }
+  return true;
+}
+
+bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
+                                        std::vector<Node>& nr,
+                                        int dir,
+                                        Node& curr)
+{
+  Assert(dir == 1 || dir == -1);
+  Assert(nr.empty());
+  Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+  bool ret = false;
+  bool success;
+  unsigned sindex = 0;
+  do
+  {
+    Assert(!curr.isNull());
+    success = false;
+    if (curr != zero && sindex < n1.size())
+    {
+      unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
+      if (n1[sindex_use].isConst())
+      {
+        // could strip part of a constant
+        Node lowerBound =
+            ArithEntail::getConstantBound(Rewriter::rewrite(curr));
+        if (!lowerBound.isNull())
+        {
+          Assert(lowerBound.isConst());
+          Rational lbr = lowerBound.getConst<Rational>();
+          if (lbr.sgn() > 0)
+          {
+            Assert(ArithEntail::check(curr, true));
+            CVC4::String s = n1[sindex_use].getConst<String>();
+            Node ncl =
+                NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
+            Node next_s =
+                NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
+            next_s = Rewriter::rewrite(next_s);
+            Assert(next_s.isConst());
+            // we can remove the entire constant
+            if (next_s.getConst<Rational>().sgn() >= 0)
+            {
+              curr = Rewriter::rewrite(
+                  NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
+              success = true;
+              sindex++;
+            }
+            else
+            {
+              // we can remove part of the constant
+              // lower bound minus the length of a concrete string is negative,
+              // hence lowerBound cannot be larger than long max
+              Assert(lbr < Rational(String::maxSize()));
+              curr = Rewriter::rewrite(
+                  NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
+              uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
+              Assert(lbsize < s.size());
+              if (dir == 1)
+              {
+                // strip partially from the front
+                nr.push_back(
+                    NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
+                n1[sindex_use] = NodeManager::currentNM()->mkConst(
+                    s.suffix(s.size() - lbsize));
+              }
+              else
+              {
+                // strip partially from the back
+                nr.push_back(
+                    NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
+                n1[sindex_use] = NodeManager::currentNM()->mkConst(
+                    s.prefix(s.size() - lbsize));
+              }
+              ret = true;
+            }
+            Assert(ArithEntail::check(curr));
+          }
+          else
+          {
+            // we cannot remove the constant
+          }
+        }
+      }
+      else
+      {
+        Node next_s = NodeManager::currentNM()->mkNode(
+            MINUS,
+            curr,
+            NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
+        next_s = Rewriter::rewrite(next_s);
+        if (ArithEntail::check(next_s))
+        {
+          success = true;
+          curr = next_s;
+          sindex++;
+        }
+      }
+    }
+  } while (success);
+  if (sindex > 0)
+  {
+    if (dir == 1)
+    {
+      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
+      n1.erase(n1.begin(), n1.begin() + sindex);
+    }
+    else
+    {
+      nr.insert(nr.end(), n1.end() - sindex, n1.end());
+      n1.erase(n1.end() - sindex, n1.end());
+    }
+    ret = true;
+  }
+  return ret;
+}
+
+int StringsEntail::componentContains(std::vector<Node>& n1,
+                                     std::vector<Node>& n2,
+                                     std::vector<Node>& nb,
+                                     std::vector<Node>& ne,
+                                     bool computeRemainder,
+                                     int remainderDir)
+{
+  Assert(nb.empty());
+  Assert(ne.empty());
+  // if n2 is a singleton, we can do optimized version here
+  if (n2.size() == 1)
+  {
+    for (unsigned i = 0; i < n1.size(); i++)
+    {
+      Node n1rb;
+      Node n1re;
+      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
+      {
+        if (computeRemainder)
+        {
+          n1[i] = n2[0];
+          if (remainderDir != -1)
+          {
+            if (!n1re.isNull())
+            {
+              ne.push_back(n1re);
+            }
+            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
+            n1.erase(n1.begin() + i + 1, n1.end());
+          }
+          else if (!n1re.isNull())
+          {
+            n1[i] = Rewriter::rewrite(
+                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
+          }
+          if (remainderDir != 1)
+          {
+            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
+            n1.erase(n1.begin(), n1.begin() + i);
+            if (!n1rb.isNull())
+            {
+              nb.push_back(n1rb);
+            }
+          }
+          else if (!n1rb.isNull())
+          {
+            n1[i] = Rewriter::rewrite(
+                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
+          }
+        }
+        return i;
+      }
+    }
+  }
+  else if (n1.size() >= n2.size())
+  {
+    unsigned diff = n1.size() - n2.size();
+    for (unsigned i = 0; i <= diff; i++)
+    {
+      Node n1rb_first;
+      Node n1re_first;
+      // first component of n2 must be a suffix
+      if (componentContainsBase(n1[i],
+                                n2[0],
+                                n1rb_first,
+                                n1re_first,
+                                1,
+                                computeRemainder && remainderDir != 1))
+      {
+        Assert(n1re_first.isNull());
+        for (unsigned j = 1; j < n2.size(); j++)
+        {
+          // are we in the last component?
+          if (j + 1 == n2.size())
+          {
+            Node n1rb_last;
+            Node n1re_last;
+            // last component of n2 must be a prefix
+            if (componentContainsBase(n1[i + j],
+                                      n2[j],
+                                      n1rb_last,
+                                      n1re_last,
+                                      -1,
+                                      computeRemainder && remainderDir != -1))
+            {
+              Assert(n1rb_last.isNull());
+              if (computeRemainder)
+              {
+                if (remainderDir != -1)
+                {
+                  if (!n1re_last.isNull())
+                  {
+                    ne.push_back(n1re_last);
+                  }
+                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
+                  n1.erase(n1.begin() + i + j + 1, n1.end());
+                  n1[i + j] = n2[j];
+                }
+                if (remainderDir != 1)
+                {
+                  n1[i] = n2[0];
+                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
+                  n1.erase(n1.begin(), n1.begin() + i);
+                  if (!n1rb_first.isNull())
+                  {
+                    nb.push_back(n1rb_first);
+                  }
+                }
+              }
+              return i;
+            }
+            else
+            {
+              break;
+            }
+          }
+          else if (n1[i + j] != n2[j])
+          {
+            break;
+          }
+        }
+      }
+    }
+  }
+  return -1;
+}
+
+bool StringsEntail::componentContainsBase(
+    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
+{
+  Assert(n1rb.isNull());
+  Assert(n1re.isNull());
+
+  NodeManager* nm = NodeManager::currentNM();
+
+  if (n1 == n2)
+  {
+    return true;
+  }
+  else
+  {
+    if (n1.isConst() && n2.isConst())
+    {
+      size_t len1 = Word::getLength(n1);
+      size_t len2 = Word::getLength(n2);
+      if (len2 < len1)
+      {
+        if (dir == 1)
+        {
+          if (Word::suffix(n1, len2) == n2)
+          {
+            if (computeRemainder)
+            {
+              n1rb = Word::prefix(n1, len1 - len2);
+            }
+            return true;
+          }
+        }
+        else if (dir == -1)
+        {
+          if (Word::prefix(n1, len2) == n2)
+          {
+            if (computeRemainder)
+            {
+              n1re = Word::suffix(n1, len1 - len2);
+            }
+            return true;
+          }
+        }
+        else
+        {
+          size_t f = Word::find(n1, n2);
+          if (f != std::string::npos)
+          {
+            if (computeRemainder)
+            {
+              if (f > 0)
+              {
+                n1rb = Word::prefix(n1, f);
+              }
+              if (len1 > f + len2)
+              {
+                n1re = Word::suffix(n1, len1 - (f + len2));
+              }
+            }
+            return true;
+          }
+        }
+      }
+    }
+    else
+    {
+      // cases for:
+      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
+      if (n2.getKind() == STRING_SUBSTR)
+      {
+        if (n2[0] == n1)
+        {
+          bool success = true;
+          Node start_pos = n2[1];
+          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
+          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
+          if (dir == 1)
+          {
+            // To be a suffix, start + length must be greater than
+            // or equal to the length of the string.
+            success = ArithEntail::check(end_pos, len_n2s);
+          }
+          else if (dir == -1)
+          {
+            // To be a prefix, must literally start at 0, since
+            //   if we knew it started at <0, it should be rewritten to "",
+            //   if we knew it started at 0, then n2[1] should be rewritten to
+            //   0.
+            success = start_pos.isConst()
+                      && start_pos.getConst<Rational>().sgn() == 0;
+          }
+          if (success)
+          {
+            if (computeRemainder)
+            {
+              // we can only compute the remainder if start_pos and end_pos
+              // are known to be non-negative.
+              if (!ArithEntail::check(start_pos)
+                  || !ArithEntail::check(end_pos))
+              {
+                return false;
+              }
+              if (dir != 1)
+              {
+                n1rb = nm->mkNode(
+                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
+              }
+              if (dir != -1)
+              {
+                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
+              }
+            }
+            return true;
+          }
+        }
+      }
+
+      if (!computeRemainder && dir == 0)
+      {
+        if (n1.getKind() == STRING_STRREPL)
+        {
+          // (str.contains (str.replace x y z) w) ---> true
+          // if (str.contains x w) --> true and (str.contains z w) ---> true
+          Node xCtnW = StringsEntail::checkContains(n1[0], n2);
+          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
+          {
+            Node zCtnW = StringsEntail::checkContains(n1[2], n2);
+            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
+            {
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
+                                           std::vector<Node>& n2,
+                                           std::vector<Node>& nb,
+                                           std::vector<Node>& ne,
+                                           int dir)
+{
+  Assert(nb.empty());
+  Assert(ne.empty());
+
+  NodeManager* nm = NodeManager::currentNM();
+  bool changed = false;
+  // for ( forwards, backwards ) direction
+  for (unsigned r = 0; r < 2; r++)
+  {
+    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
+    {
+      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
+      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
+      bool removeComponent = false;
+      Node n1cmp = n1[index0];
+
+      if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
+      {
+        return false;
+      }
+
+      std::vector<Node> sss;
+      std::vector<Node> sls;
+      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
+      Trace("strings-rewrite-debug2")
+          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
+          << ", dir = " << dir << std::endl;
+      if (n1cmp.isConst())
+      {
+        CVC4::String s = n1cmp.getConst<String>();
+        // overlap is an overapproximation of the number of characters
+        // n2[index1] can match in s
+        unsigned overlap = s.size();
+        if (n2[index1].isConst())
+        {
+          CVC4::String t = n2[index1].getConst<String>();
+          std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
+          if (ret == std::string::npos)
+          {
+            if (n1.size() == 1)
+            {
+              // can remove everything
+              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
+              //   str.contains( "", str.++( "ba", x ) )
+              removeComponent = true;
+            }
+            else if (sss.empty())  // only if not substr
+            {
+              // check how much overlap there is
+              // This is used to partially strip off the endpoint
+              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
+              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
+              overlap = r == 0 ? s.overlap(t) : t.overlap(s);
+            }
+            else
+            {
+              // if we are looking at a substring, we can remove the component
+              // if there is no overlap
+              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
+              //        --> str.contains( x, "a" )
+              removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+            }
+          }
+          else if (sss.empty())  // only if not substr
+          {
+            Assert(ret < s.size());
+            // can strip off up to the find position, e.g.
+            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
+            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
+            // and
+            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
+            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
+            overlap = s.size() - ret;
+          }
+        }
+        else
+        {
+          // inconclusive
+        }
+        // process the overlap
+        if (overlap < s.size())
+        {
+          changed = true;
+          if (overlap == 0)
+          {
+            removeComponent = true;
+          }
+          else
+          {
+            // can drop the prefix (resp. suffix) from the first (resp. last)
+            // component
+            if (r == 0)
+            {
+              nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
+              n1[index0] = nm->mkConst(s.suffix(overlap));
+            }
+            else
+            {
+              ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
+              n1[index0] = nm->mkConst(s.prefix(overlap));
+            }
+          }
+        }
+      }
+      else if (n1cmp.getKind() == STRING_ITOS)
+      {
+        if (n2[index1].isConst())
+        {
+          CVC4::String t = n2[index1].getConst<String>();
+
+          if (n1.size() == 1)
+          {
+            // if n1.size()==1, then if n2[index1] is not a number, we can drop
+            // the entire component
+            //    e.g. str.contains( int.to.str(x), "123a45") --> false
+            if (!t.isNumber())
+            {
+              removeComponent = true;
+            }
+          }
+          else
+          {
+            const std::vector<unsigned>& tvec = t.getVec();
+            Assert(tvec.size() > 0);
+
+            // if n1.size()>1, then if the first (resp. last) character of
+            // n2[index1]
+            //  is not a digit, we can drop the entire component, e.g.:
+            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
+            //    str.contains( y, "a12" )
+            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
+            //    str.contains( y, "a0b" )
+            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
+            if (!String::isDigit(tvec[i]))
+            {
+              removeComponent = true;
+            }
+          }
+        }
+      }
+      if (removeComponent)
+      {
+        // can drop entire first (resp. last) component
+        if (r == 0)
+        {
+          nb.push_back(n1[index0]);
+          n1.erase(n1.begin(), n1.begin() + 1);
+        }
+        else
+        {
+          ne.push_back(n1[index0]);
+          n1.pop_back();
+        }
+        if (n1.empty())
+        {
+          // if we've removed everything, just return (we will rewrite to false)
+          return true;
+        }
+        else
+        {
+          changed = true;
+        }
+      }
+    }
+  }
+  // TODO (#1180) : computing the maximal overlap in this function may be
+  // important.
+  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
+  // false
+  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
+  //   leaving 4 characters
+  //      which is larger that the upper bound for length of str.substr(y,0,3),
+  //      which is 3.
+  return changed;
+}
+
+Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node ctn = nm->mkNode(STRING_STRCTN, a, b);
+
+  if (fullRewriter)
+  {
+    ctn = Rewriter::rewrite(ctn);
+  }
+  else
+  {
+    Node prev;
+    do
+    {
+      prev = ctn;
+      ctn = SequencesRewriter::rewriteContains(ctn);
+    } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
+  }
+
+  Assert(ctn.getType().isBoolean());
+  return ctn.isConst() ? ctn : Node::null();
+}
+
+bool StringsEntail::checkNonEmpty(Node a)
+{
+  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
+  len = Rewriter::rewrite(len);
+  return ArithEntail::check(len, true);
+}
+
+bool StringsEntail::checkLengthOne(Node s, bool strict)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node one = nm->mkConst(Rational(1));
+  Node len = nm->mkNode(STRING_LENGTH, s);
+  len = Rewriter::rewrite(len);
+  return ArithEntail::check(one, len)
+         && (!strict || ArithEntail::check(len, true));
+}
+
+bool StringsEntail::checkMultisetSubset(Node a, Node b)
+{
+  std::vector<Node> avec;
+  utils::getConcat(getMultisetApproximation(a), avec);
+  std::vector<Node> bvec;
+  utils::getConcat(b, bvec);
+
+  std::map<Node, unsigned> num_nconst[2];
+  std::map<Node, unsigned> num_const[2];
+  for (unsigned j = 0; j < 2; j++)
+  {
+    std::vector<Node>& jvec = j == 0 ? avec : bvec;
+    for (const Node& cc : jvec)
+    {
+      if (cc.isConst())
+      {
+        num_const[j][cc]++;
+      }
+      else
+      {
+        num_nconst[j][cc]++;
+      }
+    }
+  }
+  bool ms_success = true;
+  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
+  {
+    if (nncp.second > num_nconst[1][nncp.first])
+    {
+      ms_success = false;
+      break;
+    }
+  }
+  if (ms_success)
+  {
+    // count the number of constant characters in the first argument
+    std::map<Node, unsigned> count_const[2];
+    std::vector<Node> chars;
+    for (unsigned j = 0; j < 2; j++)
+    {
+      for (std::pair<const Node, unsigned>& ncp : num_const[j])
+      {
+        Node cn = ncp.first;
+        Assert(cn.isConst());
+        std::vector<Node> cnChars = Word::getChars(cn);
+        for (const Node& ch : cnChars)
+        {
+          count_const[j][ch] += ncp.second;
+          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
+          {
+            chars.push_back(ch);
+          }
+        }
+      }
+    }
+    Trace("strings-entail-ms-ss")
+        << "For " << a << " and " << b << " : " << std::endl;
+    for (const Node& ch : chars)
+    {
+      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
+      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
+      Trace("strings-entail-ms-ss")
+          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
+      if (count_const[0][ch] < count_const[1][ch])
+      {
+        return true;
+      }
+    }
+
+    // TODO (#1180): count the number of 2,3,4,.. character substrings
+    // for example:
+    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
+    // since the second argument contains more occurrences of "bb".
+    // note this is orthogonal reasoning to inductive reasoning
+    // via regular membership reduction in Liang et al CAV 2015.
+  }
+  return false;
+}
+
+Node StringsEntail::checkHomogeneousString(Node a)
+{
+  std::vector<Node> avec;
+  utils::getConcat(getMultisetApproximation(a), avec);
+
+  bool cValid = false;
+  Node c;
+  for (const Node& ac : avec)
+  {
+    if (ac.isConst())
+    {
+      std::vector<Node> acv = Word::getChars(ac);
+      for (const Node& cc : acv)
+      {
+        if (!cValid)
+        {
+          cValid = true;
+          c = cc;
+        }
+        else if (c != cc)
+        {
+          // Found a different character
+          return Node::null();
+        }
+      }
+    }
+    else
+    {
+      // Could produce a different character
+      return Node::null();
+    }
+  }
+
+  if (!cValid)
+  {
+    return Word::mkEmptyWord(a.getType());
+  }
+
+  return c;
+}
+
+Node StringsEntail::getMultisetApproximation(Node a)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  if (a.getKind() == STRING_SUBSTR)
+  {
+    return a[0];
+  }
+  else if (a.getKind() == STRING_STRREPL)
+  {
+    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
+  }
+  else if (a.getKind() == STRING_CONCAT)
+  {
+    NodeBuilder<> nb(STRING_CONCAT);
+    for (const Node& ac : a)
+    {
+      nb << getMultisetApproximation(ac);
+    }
+    return nb.constructNode();
+  }
+  else
+  {
+    return a;
+  }
+}
+
+Node StringsEntail::getStringOrEmpty(Node n)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node res;
+  while (res.isNull())
+  {
+    switch (n.getKind())
+    {
+      case STRING_STRREPL:
+      {
+        Node empty = nm->mkConst(::CVC4::String(""));
+        if (n[0] == empty)
+        {
+          // (str.replace "" x y) --> y
+          n = n[2];
+          break;
+        }
+
+        if (checkLengthOne(n[0]) && n[2] == empty)
+        {
+          // (str.replace "A" x "") --> "A"
+          res = n[0];
+          break;
+        }
+
+        res = n;
+        break;
+      }
+      case STRING_SUBSTR:
+      {
+        if (checkLengthOne(n[0]))
+        {
+          // (str.substr "A" x y) --> "A"
+          res = n[0];
+          break;
+        }
+        res = n;
+        break;
+      }
+      default:
+      {
+        res = n;
+        break;
+      }
+    }
+  }
+  return res;
+}
+
+Node StringsEntail::inferEqsFromContains(Node x, Node y)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Node emp = nm->mkConst(String(""));
+  Assert(x.getType() == y.getType());
+  TypeNode stype = x.getType();
+
+  Node xLen = nm->mkNode(STRING_LENGTH, x);
+  std::vector<Node> yLens;
+  if (y.getKind() != STRING_CONCAT)
+  {
+    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
+  }
+  else
+  {
+    for (const Node& yi : y)
+    {
+      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
+    }
+  }
+
+  std::vector<Node> zeroLens;
+  if (x == emp)
+  {
+    // If x is the empty string, then all ys must be empty, too, and we can
+    // skip the expensive checks. Note that this is just a performance
+    // optimization.
+    zeroLens.swap(yLens);
+  }
+  else
+  {
+    // Check if we can infer that str.len(x) <= str.len(y). If that is the
+    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
+    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
+    // true. The terms that can have length zero without making the inequality
+    // false must be all be empty if (str.contains x y) is true.
+    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
+    {
+      // We could not prove that the inequality holds
+      return Node::null();
+    }
+    else if (yLens.size() == y.getNumChildren())
+    {
+      // We could only prove that the inequality holds but not that any of the
+      // ys must be empty
+      return nm->mkNode(EQUAL, x, y);
+    }
+  }
+
+  if (y.getKind() != STRING_CONCAT)
+  {
+    if (zeroLens.size() == 1)
+    {
+      // y is not a concatenation and we found that it must be empty, so just
+      // return (= y "")
+      Assert(zeroLens[0][0] == y);
+      return nm->mkNode(EQUAL, y, emp);
+    }
+    else
+    {
+      Assert(yLens.size() == 1 && yLens[0][0] == y);
+      return nm->mkNode(EQUAL, x, y);
+    }
+  }
+
+  std::vector<Node> cs;
+  for (const Node& yiLen : yLens)
+  {
+    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
+    cs.push_back(yiLen[0]);
+  }
+
+  NodeBuilder<> nb(AND);
+  // (= x (str.++ y1' ... ym'))
+  if (!cs.empty())
+  {
+    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
+  }
+  // (= y1'' "") ... (= yk'' "")
+  for (const Node& zeroLen : zeroLens)
+  {
+    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
+    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
+  }
+
+  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
+  return nb.constructNode();
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2020-04-03 09:43:12 -0500
committer	GitHub <noreply@github.com>	2020-04-03 09:43:12 -0500
commit	d91b52085d7e3bbda65117c0cd88433aed383aff (patch)
tree	5ed2055704066d28a3247a82030ed44bfeda4a57 /src/theory/strings/strings_entail.cpp
parent	e24e6f3620996ee9e5010d30fefc51247cc55fdc (diff)