From 1516e3b5d9436be86841a52002fc728fcd52dd34 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Mon, 24 May 2021 11:15:12 -0500 Subject: Fix re-elim length requirement for symbolic RE memberships (#6609) Fixes #6604. Previously, re-elim was solution unsound for cases where the LHS and a component of the RHS were both empty. This ensures a length requirement is given for the LHS to ensure proper containment. --- src/theory/strings/regexp_elim.cpp | 42 +++++++++++++--------- test/regress/CMakeLists.txt | 1 + .../regress0/strings/issue6604-re-elim.smt2 | 6 ++++ 3 files changed, 32 insertions(+), 17 deletions(-) create mode 100644 test/regress/regress0/strings/issue6604-re-elim.smt2 diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp index 4b033b7bd..b9af7a23f 100644 --- a/src/theory/strings/regexp_elim.cpp +++ b/src/theory/strings/regexp_elim.cpp @@ -88,10 +88,9 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg) // have a fixed length. // The intuition why this is a "non-aggressive" rewrite is that membership // into fixed length regular expressions are easy to handle. - bool hasFixedLength = true; // the index of _* in re unsigned pivotIndex = 0; - bool hasPivotIndex = false; + size_t numPivotIndex = 0; std::vector childLengths; std::vector childLengthsPostPivot; for (unsigned i = 0, size = children.size(); i < size; i++) @@ -100,32 +99,37 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg) Node fl = RegExpEntail::getFixedLengthForRegexp(c); if (fl.isNull()) { - if (!hasPivotIndex && c.getKind() == REGEXP_STAR + if (numPivotIndex == 0 && c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA) { - hasPivotIndex = true; + numPivotIndex = 1; pivotIndex = i; - // set to zero for the sum below + // zero is used in sum below and is used for concat-fixed-len fl = zero; } else { - hasFixedLength = false; - break; + numPivotIndex++; } } - childLengths.push_back(fl); - if (hasPivotIndex) + if (!fl.isNull()) { - childLengthsPostPivot.push_back(fl); + childLengths.push_back(fl); + if (numPivotIndex > 0) + { + childLengthsPostPivot.push_back(fl); + } } } - if (hasFixedLength) + Node lenSum = childLengths.size() > 1 ? nm->mkNode(PLUS, childLengths) + : childLengths[0]; + // if we have at most one pivot index + if (numPivotIndex <= 1) { + bool hasPivotIndex = (numPivotIndex == 1); Assert(re.getNumChildren() == children.size()); - Node sum = nm->mkNode(PLUS, childLengths); std::vector conc; - conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, sum)); + conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, lenSum)); Node currEnd = zero; for (unsigned i = 0, size = childLengths.size(); i < size; i++) { @@ -326,7 +330,7 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg) Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx); conj.push_back(fit); } - Node res = conj.size() == 1 ? conj[0] : nm->mkNode(AND, conj); + Node res = nm->mkAnd(conj); // process the non-greedy find variables if (!non_greedy_find_vars.empty()) { @@ -342,19 +346,23 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg) Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars); res = nm->mkNode(EXISTS, bvl, body); } + // must also give a minimum length requirement + res = nm->mkNode(AND, res, nm->mkNode(GEQ, lenx, lenSum)); // Examples of this elimination: // x in (re.++ "A" (re.* _) "B" (re.* _)) ---> // substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1 // x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) ---> // indexof(x,"A",0)!=-1 ^ // indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^ - // indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) + // indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) ^ + // len(x) >= 7 // An example of a non-greedy find: // x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) ---> - // exists k. 0 <= k < len( x ) ^ + // (exists k. 0 <= k < len( x ) ^ // indexof( x, "A", k ) != -1 ^ - // substr( x, indexof( x, "A", k )+2, 1 ) = "B" + // substr( x, indexof( x, "A", k )+2, 1 ) = "B") ^ + // len(x) >= 3 return returnElim(atom, res, "concat-with-gaps"); } } diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 8cb7f7d1f..3d8016379 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1141,6 +1141,7 @@ set(regress_0_tests regress0/strings/issue6510-seq-bool.smt2 regress0/strings/issue6520.smt2 regress0/strings/issue6560-indexof-reduction.smt2 + regress0/strings/issue6604-re-elim.smt2 regress0/strings/itos-entail.smt2 regress0/strings/large-model.smt2 regress0/strings/leadingzero001.smt2 diff --git a/test/regress/regress0/strings/issue6604-re-elim.smt2 b/test/regress/regress0/strings/issue6604-re-elim.smt2 new file mode 100644 index 000000000..f75b1b642 --- /dev/null +++ b/test/regress/regress0/strings/issue6604-re-elim.smt2 @@ -0,0 +1,6 @@ +; COMMAND-LINE: --re-elim --strings-exp +; EXPECT: unsat +(set-logic ALL) +(declare-fun a () String) +(assert (str.in_re a (re.++ re.allchar (str.to_re a) (re.* re.allchar)))) +(check-sat) -- cgit v1.2.3