Ensure strict length constraint for decompose rule (#4821)

Fixes #4820. The performance issue was caused by 0988217562006d3f59e01dc261f39121df6d75f5. That commit introduced an option (`--strings-len-conc`) that optionally moves the length constraint for skolems to the conclusion of an inference instead of making it part of the term registration. However, for `DEQ_DISL_STRINGS_SPLIT`, we were only asserting that `LENGTH_GEQ_ONE` in the case where this option was not enabled, instead of asserting that the length of the skolem is equal to the component on the other side of the disequality. This lead to an infinite loop of inferences because we effectively were just splitting a component into two skolems and the only restriction was that the first one was non-empty. We guessed the second skolem to be empty, so the first skolem was equal to the component, the skolem was marked congruent, and we ended up with the same normal form as before. The commit fixes the issue by adding an argument to `getDecomposeConclusion()` that specifies whether to add the length constraint or not. This option is used to always add the length constraint in the case of `DEQ_DISL_STRINGS_SPLIT`.
author: Andres Noetzli <andres.noetzli@gmail.com> 2020-08-01 23:56:27 -0700
committer: GitHub <noreply@github.com> 2020-08-01 23:56:27 -0700
commit: 475985ccc80b1ddb38d912a3c6658912f1fc6207 (patch)
tree: ffc5cf8164e9bbc8a1e2edf169833463b26df693
parent: 5be889668bfb52d6705c2dc37ec05c291c7c9445 (diff)
4 files changed, 46 insertions, 18 deletions
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index 062bfe12f..438a559b8 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -839,8 +839,12 @@ size_t CoreSolver::getSufficientNonEmptyOverlap(Node c, Node d, bool isRev)
   return p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p);
 }
 
-Node CoreSolver::getDecomposeConclusion(
-    Node x, Node l, bool isRev, SkolemCache* skc, std::vector<Node>& newSkolems)
+Node CoreSolver::getDecomposeConclusion(Node x,
+                                        Node l,
+                                        bool isRev,
+                                        bool addLenConc,
+                                        SkolemCache* skc,
+                                        std::vector<Node>& newSkolems)
 {
   Assert(l.getType().isInteger());
   NodeManager* nm = NodeManager::currentNM();
@@ -850,7 +854,7 @@ Node CoreSolver::getDecomposeConclusion(
   Node sk2 = skc->mkSkolemCached(x, n, SkolemCache::SK_SUFFIX_REM, "dc_spt2");
   newSkolems.push_back(sk2);
   Node conc = x.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk2));
-  if (options::stringLenConc())
+  if (addLenConc)
   {
     Node lc = nm->mkNode(STRING_LENGTH, isRev ? sk2 : sk1).eqNode(l);
     conc = nm->mkNode(AND, conc, lc);
@@ -2081,8 +2085,8 @@ void CoreSolver::processDeq(Node ni, Node nj)
           // len(x)>=1 => x = k1 ++ k2 ^ len(k1) = 1
           SkolemCache* skc = d_termReg.getSkolemCache();
           std::vector<Node> newSkolems;
-          Node conc =
-              getDecomposeConclusion(nck, d_one, false, skc, newSkolems);
+          Node conc = getDecomposeConclusion(
+              nck, d_one, false, options::stringLenConc(), skc, newSkolems);
           Assert(newSkolems.size() == 2);
           if (options::stringLenConc())
           {
@@ -2108,8 +2112,8 @@ void CoreSolver::processDeq(Node ni, Node nj)
         // are both non-constants. We split them into parts that have the same
         // lengths.
         //
-        // len(x) >= len(y) => x = k1 ++ k2 ^ len(k1) = len(y)
-        // len(y) >= len(x) => y = k3 ++ k4 ^ len(k3) = len(x)
+        // len(x) > len(y) => x = k1 ++ k2 ^ len(k1) = len(y)
+        // len(y) > len(x) => y = k3 ++ k4 ^ len(k3) = len(x)
         Trace("strings-solve")
             << "Non-Simple Case 1 : add lemmas " << std::endl;
         SkolemCache* skc = d_termReg.getSkolemCache();
@@ -2120,19 +2124,24 @@ void CoreSolver::processDeq(Node ni, Node nj)
           Node uy = r == 0 ? y : x;
           Node uxLen = nm->mkNode(STRING_LENGTH, ux);
           Node uyLen = nm->mkNode(STRING_LENGTH, uy);
+          // We always request the length constraint in the conclusion here
+          // because the skolem needs to have length `uyLen`. If we only assert
+          // that the skolem's length is greater or equal to one, we can end up
+          // in a loop:
+          //
+          // 1. Split: x = k1 ++ k2 ^ len(k1) >= 1
+          // 2. Assume: k2 = ""
+          // 3. Deduce: x = k1
+          //
+          // After step 3, `k1` is marked congruent because `x` is the older
+          // variable. So we get `x` in the normal form again.
           std::vector<Node> newSkolems;
-          Node conc = getDecomposeConclusion(ux, uyLen, false, skc, newSkolems);
+          Node conc =
+              getDecomposeConclusion(ux, uyLen, false, true, skc, newSkolems);
           Assert(newSkolems.size() == 2);
-          if (options::stringLenConc())
-          {
-            d_termReg.registerTermAtomic(newSkolems[0], LENGTH_IGNORE);
-          }
-          else
-          {
-            d_termReg.registerTermAtomic(newSkolems[0], LENGTH_GEQ_ONE);
-          }
+          d_termReg.registerTermAtomic(newSkolems[1], LENGTH_GEQ_ONE);
           std::vector<Node> antecLen;
-          antecLen.push_back(nm->mkNode(GEQ, uxLen, uyLen));
+          antecLen.push_back(nm->mkNode(GT, uxLen, uyLen));
           d_im.sendInference(
               {}, antecLen, conc, Inference::DEQ_DISL_STRINGS_SPLIT, true);
         }
diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h
index 2ee61e759..da364e243 100644
--- a/src/theory/strings/core_solver.h
+++ b/src/theory/strings/core_solver.h
@@ -260,11 +260,13 @@ class CoreSolver
    * a conjunction of splitting string x into pieces based on length l, e.g.:
    *   x = k_1 ++ k_2
    * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
-   * length l if isRev is false (resp. true).
+   * length l if isRev is false (resp. true). The function optionally adds a
+   * length constraint len(k_1) = l (resp. len(k_2) = l).
    *
    * @param x The string term
    * @param l The length term
    * @param isRev Whether the equation is in a reverse direction
+   * @param addLenConc Whether to add the length constraint
    * @param skc The skolem cache (to allocate fresh variables if necessary)
    * @param newSkolems The vector to add new variables to
    * @return The conclusion of the inference.
@@ -272,6 +274,7 @@ class CoreSolver
   static Node getDecomposeConclusion(Node x,
                                      Node l,
                                      bool isRev,
+                                     bool addLenConc,
                                      SkolemCache* skc,
                                      std::vector<Node>& newSkolems);
 
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index a0fee2185..f9840e552 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -983,6 +983,7 @@ set(regress_0_tests
   regress0/strings/issue4376.smt2
   regress0/strings/issue4662-consume-nterm.smt2
   regress0/strings/issue4674-recomp-nf.smt2
+  regress0/strings/issue4820.smt2
   regress0/strings/itos-entail.smt2
   regress0/strings/large-model.smt2
   regress0/strings/leadingzero001.smt2
diff --git a/test/regress/regress0/strings/issue4820.smt2 b/test/regress/regress0/strings/issue4820.smt2
new file mode 100644
index 000000000..0bf0fac65
--- /dev/null
+++ b/test/regress/regress0/strings/issue4820.smt2
@@ -0,0 +1,15 @@
+; EXPECT: sat
+(set-logic QF_S)
+(declare-fun _substvar_270_ () String)
+(declare-fun _substvar_273_ () Bool)
+(declare-fun _substvar_275_ () Bool)
+(declare-fun _substvar_293_ () Bool)
+(declare-const v0 Bool)
+(declare-const v1 Bool)
+(declare-const v2 Bool)
+(declare-const v3 Bool)
+(declare-const Str3 String)
+(declare-const Str7 String)
+(declare-const Str15 String)
+(assert (or (xor true true (xor v0 v3 false v1 false v2 v2 v1 false v2 v1) true true (=> (or v1 (distinct Str15 _substvar_270_ Str3 (str.++ Str7 Str7)) v2) false) false true) _substvar_275_))
+(check-sat)
author	Andres Noetzli <andres.noetzli@gmail.com>	2020-08-01 23:56:27 -0700
committer	GitHub <noreply@github.com>	2020-08-01 23:56:27 -0700
commit	475985ccc80b1ddb38d912a3c6658912f1fc6207 (patch)
tree	ffc5cf8164e9bbc8a1e2edf169833463b26df693
parent	5be889668bfb52d6705c2dc37ec05c291c7c9445 (diff)