Disable substring component contains in strip endpoints (#6266)

Fixes the first benchmark from #6203.
author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2021-04-03 13:21:55 -0500
committer: GitHub <noreply@github.com> 2021-04-03 13:21:55 -0500
commit: 69b463e1b1150715b2f4179786ddab8ba0c43b37 (patch)
tree: e3c1dcb7d32dc0dbb3dd159cc37ab92371bc9ce3
parent: fba1437c772b6733ce678b93b5ef7d95e366c82d (diff)
4 files changed, 22 insertions, 57 deletions
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp
index 62ba41743..54af1ddcd 100644
--- a/src/theory/strings/strings_entail.cpp
+++ b/src/theory/strings/strings_entail.cpp
@@ -517,7 +517,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
       n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
       Trace("strings-rewrite-debug2")
           << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
-          << ", dir = " << dir << std::endl;
+          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
       if (n1cmp.isConst())
       {
         Node s = n1cmp;
@@ -536,6 +536,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
               // can remove everything
               //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
               //   str.contains( "", str.++( "ba", x ) )
+              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
+              //   str.contains( "", "d" )
               removeComponent = true;
             }
             else if (sss.empty())  // only if not substr
@@ -546,15 +548,11 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
               // str.contains( str.++( "c", x ), str.++( "cd", y ) )
               overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
             }
-            else
-            {
-              // if we are looking at a substring, we can remove the component
-              // if there is no overlap
-              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
-              //        --> str.contains( x, "a" )
-              removeComponent =
-                  ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
-            }
+            // note that we cannot process substring here, since t may
+            // match only part of s. Consider:
+            // (str.++ "C" (str.substr "AB" x y)), "CB"
+            // where "AB" and "CB" have no overlap, but "C" is not part of what
+            // is matched with "AB".
           }
           else if (sss.empty())  // only if not substr
           {
@@ -572,6 +570,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
         {
           // inconclusive
         }
+        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
         // process the overlap
         if (overlap < slen)
         {
@@ -635,6 +634,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
       }
       if (removeComponent)
       {
+        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
         // can drop entire first (resp. last) component
         if (r == 0)
         {
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index 81c2bdba3..e125c651e 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -2041,6 +2041,7 @@ set(regress_1_tests
   regress1/strings/issue6132-non-unique-skolem.smt2
   regress1/strings/issue6142-repl-inv-rew.smt2
   regress1/strings/issue6191-replace-all.smt2
+  regress1/strings/issue6203-1-substr-ctn-strip.smt2
   regress1/strings/issue6203-2-re-ccache.smt2
   regress1/strings/kaluza-fl.smt2
   regress1/strings/loop002.smt2
diff --git a/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2 b/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2
new file mode 100644
index 000000000..dde9c8210
--- /dev/null
+++ b/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2
@@ -0,0 +1,11 @@
+; COMMAND-LINE: --strings-exp -q
+; EXPECT: sat
+(set-logic ALL)
+(declare-fun a () String)
+(declare-fun b () String)
+(declare-fun c () String)
+(declare-fun d () String)
+(declare-fun e () String)
+(assert (or (= (str.++ c (str.substr e (* (mod 0 0) (str.len b)) (str.len d))) "ab") (= (str.++ c (str.substr e (* (mod 0 0) (str.len b)) (str.len d))) "")))
+(assert (= (str.len c) (+ (str.len e) (* (str.indexof a "a" 0) (str.len b))) 1 (str.len b)))
+(check-sat)
diff --git a/test/unit/theory/sequences_rewriter_white.cpp b/test/unit/theory/sequences_rewriter_white.cpp
index a1512d7de..f66a87a83 100644
--- a/test/unit/theory/sequences_rewriter_white.cpp
+++ b/test/unit/theory/sequences_rewriter_white.cpp
@@ -522,36 +522,6 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_indexOf)
   {
     // Same normal form for:
     //
-    // (str.indexof (str.++ "B" (str.substr "CCC" i j) x "A") "A" 0)
-    //
-    // (+ 1 (str.len (str.substr "CCC" i j))
-    //    (str.indexof (str.++ "A" x y) "A" 0))
-    Node lhs = d_nodeManager->mkNode(
-        kind::STRING_STRIDOF,
-        d_nodeManager->mkNode(
-            kind::STRING_CONCAT,
-            b,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, ccc, i, j),
-            x,
-            a),
-        a,
-        zero);
-    Node rhs = d_nodeManager->mkNode(
-        kind::PLUS,
-        one,
-        d_nodeManager->mkNode(
-            kind::STRING_LENGTH,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, ccc, i, j)),
-        d_nodeManager->mkNode(kind::STRING_STRIDOF,
-                              d_nodeManager->mkNode(kind::STRING_CONCAT, x, a),
-                              a,
-                              zero));
-    sameNormalForm(lhs, rhs);
-  }
-
-  {
-    // Same normal form for:
-    //
     // (str.indexof (str.++ "B" "C" "A" x y) "A" 0)
     //
     // (+ 2 (str.indexof (str.++ "A" x y) "A" 0))
@@ -1320,23 +1290,6 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_contains)
   {
     // Same normal form for:
     //
-    // (str.contains (str.++ (str.substr "DEF" n m) x) "AB")
-    //
-    // (str.contains x "AB")
-    lhs = d_nodeManager->mkNode(
-        kind::STRING_STRCTN,
-        d_nodeManager->mkNode(
-            kind::STRING_CONCAT,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, def, n, m),
-            x),
-        ab);
-    rhs = d_nodeManager->mkNode(kind::STRING_STRCTN, x, ab);
-    sameNormalForm(lhs, rhs);
-  }
-
-  {
-    // Same normal form for:
-    //
     // (str.contains "ABC" (str.at x n))
     //
     // (or (= x "")
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2021-04-03 13:21:55 -0500
committer	GitHub <noreply@github.com>	2021-04-03 13:21:55 -0500
commit	69b463e1b1150715b2f4179786ddab8ba0c43b37 (patch)
tree	e3c1dcb7d32dc0dbb3dd159cc37ab92371bc9ce3
parent	fba1437c772b6733ce678b93b5ef7d95e366c82d (diff)