Merge branch 'master' into miscWarnsmiscWarns

author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2018-10-10 20:53:00 -0500
committer: GitHub <noreply@github.com> 2018-10-10 20:53:00 -0500
commit: 308d47325ea00350d6f4475bf9156e339d4b2d36 (patch)
tree: eddc0d60a57df5e55ed18a1032ee4a95fcbcf8b7 /src/theory/strings
parent: 4bca88ced88af4db8b79d6e924516a7c22168391 (diff)
parent: 7d70b721f43157e01bc6166a822df79250df632a (diff)
2 files changed, 54 insertions, 3 deletions
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp
index 8ea26fca9..0310e4620 100644
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -104,18 +104,21 @@ Node RegExpElimination::eliminateConcat(Node atom)
     // prev_end stores the current (symbolic) index in x that we are
     // searching.
     Node prev_end = d_zero;
+    // the symbolic index we start searching, for each child in sep_children.
+    std::vector<Node> prev_ends;
     unsigned gap_minsize_end = gap_minsize.back();
     bool gap_exact_end = gap_exact.back();
     std::vector<Node> non_greedy_find_vars;
     for (unsigned i = 0, size = sep_children.size(); i < size; i++)
     {
-      Node sc = sep_children[i];
       if (gap_minsize[i] > 0)
       {
         // the gap to this child is at least gap_minsize[i]
         prev_end =
             nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
       }
+      prev_ends.push_back(prev_end);
+      Node sc = sep_children[i];
       Node lensc = nm->mkNode(STRING_LENGTH, sc);
       if (gap_exact[i])
       {
@@ -169,7 +172,6 @@ Node RegExpElimination::eliminateConcat(Node atom)
         Node lenSc = nm->mkNode(STRING_LENGTH, sc);
         Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd));
         Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc));
-        conj.push_back(scc);
         // We also must ensure that we fit. This constraint is necessary in
         // addition to the constraint above. Take this example:
         //     x in (re.++ "A" _ (re.* _) "B" _) --->
@@ -182,9 +184,23 @@ Node RegExpElimination::eliminateConcat(Node atom)
         // would have been the case than "ABB" would be a model for x, where
         // the second constraint refers to the third position, and the third
         // constraint refers to the second position.
+        //
+        // With respect to the above example, the following is an optimization.
+        // For that example, we instead produce:
+        //     x in (re.++ "A" _ (re.* _) "B" _) --->
+        //       substr( x, 0, 1 ) = "A" ^          // find "A"
+        //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
+        //       2 <= len( x ) - 2
+        // The intuition is that above, there are two constraints that insist
+        // that "B" is found, whereas we only need one. The last constraint
+        // above says that the "B" we find at end-2 can be found >=1 after
+        // the "A".
+        conj.pop_back();
         Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ,
-                              nm->mkNode(MINUS, prev_end, lenSc),
+                              prev_ends.back(),
                               loc);
+
+        conj.push_back(scc);
         conj.push_back(fit);
       }
       else if (gap_minsize_end > 0)
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index b2778298a..00a711a31 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -1610,6 +1610,33 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
     return returnRewrite(node, ret, "ss-len-non-pos");
   }
 
+  if (node[0].getKind() == STRING_SUBSTR)
+  {
+    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
+    //
+    // Note that this rewrite can be generalized to:
+    //
+    // (str.substr x a b) ---> "" if a >= (str.len x)
+    //
+    // This can be done when we generalize our entailment methods to
+    // accept an optional context. Then we could conjecture that
+    // (str.substr x a b) rewrites to "" and do a case analysis:
+    //
+    // - a < 0 or b < 0 (the result is trivially empty in these cases)
+    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
+    //
+    // For example, for (str.substr (str.substr x a a) a a), we could
+    // then deduce that under those assumptions, "a" is an
+    // over-approximation of the length of (str.substr x a a), which
+    // then allows us to reason that the result of the whole term must
+    // be empty.
+    if (checkEntailArith(node[1], node[0][2]))
+    {
+      Node ret = nm->mkConst(::CVC4::String(""));
+      return returnRewrite(node, ret, "ss-start-geq-len");
+    }
+  }
+
   std::vector<Node> n1;
   getConcat(node[0], n1);
 
@@ -1673,6 +1700,14 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
         Node ret = nm->mkConst(::CVC4::String(""));
         return returnRewrite(node, ret, "ss-start-entails-zero-len");
       }
+
+      // (str.substr s x x) ---> "" if (str.len s) <= 1
+      Node one = nm->mkConst(CVC4::Rational(1));
+      if (node[1] == node[2] && checkEntailArith(one, tot_len))
+      {
+        Node ret = nm->mkConst(::CVC4::String(""));
+        return returnRewrite(node, ret, "ss-len-one-z-z");
+      }
     }
     if (!curr.isNull())
     {
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2018-10-10 20:53:00 -0500
committer	GitHub <noreply@github.com>	2018-10-10 20:53:00 -0500
commit	308d47325ea00350d6f4475bf9156e339d4b2d36 (patch)
tree	eddc0d60a57df5e55ed18a1032ee4a95fcbcf8b7 /src/theory/strings
parent	4bca88ced88af4db8b79d6e924516a7c22168391 (diff)
parent	7d70b721f43157e01bc6166a822df79250df632a (diff)