diff options
author | Andres Noetzli <andres.noetzli@gmail.com> | 2021-05-27 15:42:10 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-27 22:42:10 +0000 |
commit | 29f0b8f378377ed836bddaaf88883d0b2eeb545d (patch) | |
tree | 5d9edb57c60ab8c8a07dab52f18b72dd441d4fdf /src/theory/strings | |
parent | 631032b15327c28c44b51490dceb434a38f3419a (diff) |
Fix `str.replace_re` and `str.replace_re_all` (#6615)
Fixes #6057. The reductions of `str.replace_re` and `str.replace_re_all`
were not correctly enforcing that the operations replace the _first_
occurrence of some regular expression in a string. This commit fixes the
issue by introducing a new operator `str.indexof_re(s, r, n)`, which,
analoguously to `str.indexof`, returns the index of the first match of
the regular expression `r` in `s`. The commit adds basic rewrites for
evaluating the operator as well as its reduction. Additionally, it
converts the reductions of `str.replace_re` and `str.replace_re_all` to
use that new operator. This simplifies the reductions of the two
operators and ensures that the semantics are consistent between the two.
Diffstat (limited to 'src/theory/strings')
-rw-r--r-- | src/theory/strings/extf_solver.cpp | 11 | ||||
-rw-r--r-- | src/theory/strings/kinds | 8 | ||||
-rw-r--r-- | src/theory/strings/rewrites.cpp | 4 | ||||
-rw-r--r-- | src/theory/strings/rewrites.h | 4 | ||||
-rw-r--r-- | src/theory/strings/sequences_rewriter.cpp | 59 | ||||
-rw-r--r-- | src/theory/strings/sequences_rewriter.h | 6 | ||||
-rw-r--r-- | src/theory/strings/skolem_cache.cpp | 18 | ||||
-rw-r--r-- | src/theory/strings/skolem_cache.h | 21 | ||||
-rw-r--r-- | src/theory/strings/term_registry.cpp | 18 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.cpp | 1 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_preprocess.cpp | 234 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_utils.cpp | 5 |
12 files changed, 296 insertions, 93 deletions
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index a1de5e295..9576c1d81 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -53,6 +53,7 @@ ExtfSolver::ExtfSolver(SolverState& s, d_extt.addFunctionKind(kind::STRING_SUBSTR); d_extt.addFunctionKind(kind::STRING_UPDATE); d_extt.addFunctionKind(kind::STRING_STRIDOF); + d_extt.addFunctionKind(kind::STRING_INDEXOF_RE); d_extt.addFunctionKind(kind::STRING_ITOS); d_extt.addFunctionKind(kind::STRING_STOI); d_extt.addFunctionKind(kind::STRING_STRREPL); @@ -191,11 +192,11 @@ bool ExtfSolver::doReduction(int effort, Node n) { NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN - || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_STRREPLALL || k == SEQ_NTH - || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL - || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER - || k == STRING_REV); + || k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS + || k == STRING_STOI || k == STRING_STRREPL || k == STRING_STRREPLALL + || k == SEQ_NTH || k == STRING_REPLACE_RE + || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ + || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); std::vector<Node> new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 1353ca964..743a5c006 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -21,13 +21,12 @@ operator STRING_CHARAT 2 "string charat" operator STRING_STRCTN 2 "string contains" operator STRING_LT 2 "string less than" operator STRING_LEQ 2 "string less than or equal" -operator STRING_STRIDOF 3 "string indexof" +operator STRING_STRIDOF 3 "string index of substring" +operator STRING_INDEXOF_RE 3 "string index of regular expression match" operator STRING_STRREPL 3 "string replace" operator STRING_STRREPLALL 3 "string replace all" operator STRING_REPLACE_RE 3 "string replace regular expression match" operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches" -typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>" -typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>" operator STRING_PREFIX 2 "string prefixof" operator STRING_SUFFIX 2 "string suffixof" operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit" @@ -152,8 +151,11 @@ typerule STRING_UPDATE ::cvc5::theory::strings::StringUpdateTypeRule typerule STRING_CHARAT ::cvc5::theory::strings::StringAtTypeRule typerule STRING_STRCTN ::cvc5::theory::strings::StringRelationTypeRule typerule STRING_STRIDOF ::cvc5::theory::strings::StringIndexOfTypeRule +typerule STRING_INDEXOF_RE "SimpleTypeRule<RInteger, AString, ARegExp, AInteger>" typerule STRING_STRREPL ::cvc5::theory::strings::StringReplaceTypeRule typerule STRING_STRREPLALL ::cvc5::theory::strings::StringReplaceTypeRule +typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>" +typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>" typerule STRING_PREFIX ::cvc5::theory::strings::StringStrToBoolTypeRule typerule STRING_SUFFIX ::cvc5::theory::strings::StringStrToBoolTypeRule typerule STRING_REV ::cvc5::theory::strings::StringStrToStrTypeRule diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 0c4e35df7..549d33efc 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -68,6 +68,10 @@ const char* toString(Rewrite r) case Rewrite::IDOF_PULL_ENDPT: return "IDOF_PULL_ENDPT"; case Rewrite::IDOF_STRIP_CNST_ENDPTS: return "IDOF_STRIP_CNST_ENDPTS"; case Rewrite::IDOF_STRIP_SYM_LEN: return "IDOF_STRIP_SYM_LEN"; + case Rewrite::INDEXOF_RE_EMP_RE: return "INDEXOF_RE_EMP_RE"; + case Rewrite::INDEXOF_RE_EVAL: return "INDEXOF_RE_EVAL"; + case Rewrite::INDEXOF_RE_INVALID_INDEX: return "INDEXOF_RE_INVALID_INDEX"; + case Rewrite::INDEXOF_RE_MAX_INDEX: return "INDEXOF_RE_MAX_INDEX"; case Rewrite::ITOS_EVAL: return "ITOS_EVAL"; case Rewrite::RE_AND_EMPTY: return "RE_AND_EMPTY"; case Rewrite::RE_ANDOR_FLATTEN: return "RE_ANDOR_FLATTEN"; diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index 482666faa..5e63c55c8 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -73,6 +73,10 @@ enum class Rewrite : uint32_t IDOF_PULL_ENDPT, IDOF_STRIP_CNST_ENDPTS, IDOF_STRIP_SYM_LEN, + INDEXOF_RE_EMP_RE, + INDEXOF_RE_EVAL, + INDEXOF_RE_INVALID_INDEX, + INDEXOF_RE_MAX_INDEX, ITOS_EVAL, RE_AND_EMPTY, RE_ANDOR_FLATTEN, diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 15c37070a..7ef1242c6 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -1461,6 +1461,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteIndexof(node); } + else if (nk == kind::STRING_INDEXOF_RE) + { + retNode = rewriteIndexofRe(node); + } else if (nk == kind::STRING_STRREPL) { retNode = rewriteReplace(node); @@ -2529,6 +2533,59 @@ Node SequencesRewriter::rewriteIndexof(Node node) return node; } +Node SequencesRewriter::rewriteIndexofRe(Node node) +{ + Assert(node.getKind() == STRING_INDEXOF_RE); + NodeManager* nm = NodeManager::currentNM(); + Node s = node[0]; + Node r = node[1]; + Node n = node[2]; + Node zero = nm->mkConst(Rational(0)); + Node slen = nm->mkNode(STRING_LENGTH, s); + + if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true)) + { + Node ret = nm->mkConst(Rational(-1)); + return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX); + } + + if (RegExpEntail::isConstRegExp(r)) + { + if (s.isConst() && n.isConst()) + { + Rational nrat = n.getConst<Rational>(); + cvc5::Rational rMaxInt(cvc5::String::maxSize()); + if (nrat > rMaxInt) + { + // We know that, due to limitations on the size of string constants + // in our implementation, that accessing a position greater than + // rMaxInt is guaranteed to be out of bounds. + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX); + } + + uint32_t start = nrat.getNumerator().toUnsignedInt(); + Node rem = nm->mkConst(s.getConst<String>().substr(start)); + std::pair<size_t, size_t> match = firstMatch(rem, r); + Node ret = nm->mkConst( + Rational(match.first == string::npos + ? -1 + : static_cast<int64_t>(start + match.first))); + return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL); + } + + if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n)) + { + String emptyStr(""); + if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r)) + { + return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE); + } + } + } + return node; +} + Node SequencesRewriter::rewriteReplace(Node node) { Assert(node.getKind() == kind::STRING_STRREPL); @@ -3150,7 +3207,7 @@ Node SequencesRewriter::rewriteReplaceReAll(Node node) std::vector<Node> res; String rem = x.getConst<String>(); std::pair<size_t, size_t> match(0, 0); - while (rem.size() >= 0) + while (rem.size() != 0) { match = firstMatch(nm->mkConst(rem), yp); if (match.first == string::npos) diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 7af24596a..37ed78786 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -194,6 +194,12 @@ class SequencesRewriter : public TheoryRewriter * Returns the rewritten form of node. */ Node rewriteIndexof(Node node); + /** rewrite indexof regular expression match + * This is the entry point for post-rewriting terms n of the form + * str.indexof_re( s, r, n ) + * Returns the rewritten form of node. + */ + Node rewriteIndexofRe(Node node); /** rewrite replace * This is the entry point for post-rewriting terms n of the form * str.replace( s, t, r ) diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 9b23301f3..913518bc8 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -40,6 +40,16 @@ struct IndexVarAttributeId }; typedef expr::Attribute<IndexVarAttributeId, Node> IndexVarAttribute; +/** + * A bound variable corresponding to the universally quantified integer + * variable used to range over the valid lengths of a string, used for + * axiomatizing the behavior of some term. + */ +struct LengthVarAttributeId +{ +}; +typedef expr::Attribute<LengthVarAttributeId, Node> LengthVarAttribute; + SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts) { NodeManager* nm = NodeManager::currentNM(); @@ -300,6 +310,14 @@ Node SkolemCache::mkIndexVar(Node t) return bvm->mkBoundVar<IndexVarAttribute>(t, intType); } +Node SkolemCache::mkLengthVar(Node t) +{ + NodeManager* nm = NodeManager::currentNM(); + TypeNode intType = nm->integerType(); + BoundVarManager* bvm = nm->getBoundVarManager(); + return bvm->mkBoundVar<LengthVarAttribute>(t, intType); +} + } // namespace strings } // namespace theory } // namespace cvc5 diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 126ee313d..f0376dbc6 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -107,9 +107,14 @@ class SkolemCache // in_re(a, re.++(_*, b, _*)) => // exists k_pre, k_match, k_post. // a = k_pre ++ k_match ++ k_post ^ - // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1), - // re.++(_*, b, _*)) ^ - // in_re(k2, y) + // len(k_pre) = indexof_re(x, y, 0) ^ + // (forall l. 0 < l < len(k_match) => + // ~in_re(substr(k_match, 0, l), r)) ^ + // in_re(k_match, b) + // + // k_pre is the prefix before the first, shortest match of b in a. k_match + // is the substring of a matched by b. It is either empty or there is no + // shorter string that matches b. SK_FIRST_MATCH_PRE, SK_FIRST_MATCH, SK_FIRST_MATCH_POST, @@ -180,6 +185,16 @@ class SkolemCache */ static Node mkIndexVar(Node t); + /** Make length variable + * + * This returns an integer variable of kind BOUND_VARIABLE that is used for + * axiomatizing the behavior of a term or predicate t. It refers to lengths + * of strings in the reduction of t. For example, the length variable for the + * term str.indexof(s, r, n) is used to quantify over the lengths of strings + * that could be matched by r. + */ + static Node mkLengthVar(Node t); + private: /** * Simplifies the arguments for a string skolem used for indexing into the diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index af2eecde8..c7b3b5300 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -88,10 +88,11 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc) LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality())))); lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); } - else if (tk == STRING_STRIDOF) + else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE) { - // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len - // x))) + // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x))) + // + // where f in { str.indexof, str.indexof_re } Node l = nm->mkNode(STRING_LENGTH, t[0]); lemma = nm->mkNode(AND, nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))), @@ -143,11 +144,12 @@ void TermRegistry::preRegisterTerm(TNode n) Kind k = n.getKind(); if (!options::stringExp()) { - if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL - || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL - || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER - || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE) + if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS + || k == STRING_STOI || k == STRING_STRREPL || k == STRING_SUBSTR + || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE + || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ + || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV + || k == STRING_UPDATE) { std::stringstream ss; ss << "Term of kind " << k diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 06a39ec64..b1f5a0765 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -130,6 +130,7 @@ void TheoryStrings::finishInit() d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_INDEXOF_RE, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval); diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 356ae28ac..83a4fa04a 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -253,6 +253,89 @@ Node StringsPreprocess::reduce(Node t, // Thus, indexof( x, y, n ) = skk. retNode = skk; } + else if (t.getKind() == kind::STRING_INDEXOF_RE) + { + // processing term: indexof_re(s, r, n) + Node s = t[0]; + Node r = t[1]; + Node n = t[2]; + Node skk = sc->mkTypedSkolemCached( + nm->integerType(), t, SkolemCache::SK_PURIFY, "iork"); + Node sLen = nm->mkNode(STRING_LENGTH, s); + + // n > len(x) + Node ub = nm->mkNode(GT, n, sLen); + // 0 > n + Node lb = nm->mkNode(GT, zero, n); + // n > len(x) OR 0 > n + Node condNegOne = nm->mkNode(OR, ub, lb); + // skk = -1 + Node retNegOne = skk.eqNode(negOne); + + Node emp = Word::mkEmptyWord(s.getType()); + // in_re("", r) + Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r); + // skk = n + Node retN = skk.eqNode(n); + + Node i = SkolemCache::mkIndexVar(t); + Node l = SkolemCache::mkLengthVar(t); + Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l); + Node bound = + nm->mkNode(AND, + { + nm->mkNode(GEQ, i, n), + nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)), + nm->mkNode(GT, l, zero), + nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)), + }); + Node body = nm->mkNode( + OR, + bound.negate(), + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r) + .negate()); + // forall il. + // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => + // ~in_re(substr(s, i, l), r) + Node firstMatch = mkForallInternal(bvl, body); + Node bvll = nm->mkNode(BOUND_VAR_LIST, l); + Node validLen = + nm->mkNode(AND, + nm->mkNode(GEQ, l, n), + nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk))); + Node matchBody = nm->mkNode( + AND, + validLen, + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r)); + // skk != -1 => + // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + Node match = nm->mkNode( + OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate()); + + // assert: + // IF: n > len(s) OR 0 > n + // THEN: skk = -1 + // ELIF: in_re("", r) + // THEN: skk = n + // ELSE: (forall il. + // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => + // ~in_re(substr(s, i, l), r)) ^ + // (skk != -1 => + // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + // + // Note that this reduction relies on eager reduction lemmas being sent to + // properly limit the range of skk. + Node rr = nm->mkNode( + ITE, + condNegOne, + retNegOne, + nm->mkNode( + ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match))); + asserts.push_back(rr); + + // Thus, indexof_re(s, r, n) = skk. + retNode = skk; + } else if (t.getKind() == STRING_ITOS) { // processing term: int.to.str( n ) @@ -596,17 +679,14 @@ Node StringsPreprocess::reduce(Node t, } else if (t.getKind() == STRING_REPLACE_RE) { + // processing term: replace_re( x, y, z ) Node x = t[0]; Node y = t[1]; Node z = t[2]; Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); - std::vector<Node> emptyVec; - Node sigmaStar = - nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); - Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar); - // in_re(x, re.++(_*, y, _*)) - Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + // indexof_re(x, y, 0) + Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero); // in_re("", y) Node matchesEmpty = @@ -620,42 +700,40 @@ Node StringsPreprocess::reduce(Node t, sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match"); Node k3 = sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post"); + Node k2Len = nm->mkNode(STRING_LENGTH, k2); // x = k1 ++ k2 ++ k3 - Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); - // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) - Node k2len = nm->mkNode(STRING_LENGTH, k2); - Node firstMatch = - nm->mkNode( - STRING_IN_REGEXP, - nm->mkNode( - STRING_CONCAT, - k1, - nm->mkNode( - STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))), - re) - .negate(); + Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); + // len(k1) = indexof_re(x, y, 0) + Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx); + Node l = SkolemCache::mkLengthVar(t); + Node bvll = nm->mkNode(BOUND_VAR_LIST, l); + Node bound = + nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len)); + Node body = nm->mkNode( + OR, + bound.negate(), + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y) + .negate()); + // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r) + Node shortestMatch = mkForallInternal(bvll, body); // in_re(k2, y) - Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y); + Node match = nm->mkNode(STRING_IN_REGEXP, k2, y); // k = k1 ++ z ++ k3 - Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); + Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); - // IF in_re(x, re.++(_*, y, _*)) - // THEN: - // IF in_re("", y) - // THEN: k = z ++ x - // ELSE: - // x = k1 ++ k2 ++ k3 ^ - // ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^ - // in_re(k2, y) ^ k = k1 ++ z ++ k3 - // ELSE: k = x - asserts.push_back(nm->mkNode( - ITE, - hasMatch, + // IF: indexof_re(x, y, 0) = -1 + // THEN: k = x + // ELSE: + // x = k1 ++ k2 ++ k3 ^ + // len(k1) = indexof_re(x, y, 0) ^ + // (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^ + // in_re(k2, y) ^ + // k = k1 ++ z ++ k3 + asserts.push_back( nm->mkNode(ITE, - matchesEmpty, - res1, - nm->mkNode(AND, {splitX, firstMatch, k2Match, res2})), - k.eqNode(x))); + idx.eqNode(negOne), + k.eqNode(x), + nm->mkNode(AND, {split, k1Len, shortestMatch, match, res}))); retNode = k; } else if (t.getKind() == STRING_REPLACE_RE_ALL) @@ -679,13 +757,10 @@ Node StringsPreprocess::reduce(Node t, Node emp = Word::mkEmptyWord(t.getType()); - std::vector<Node> emptyVec; - Node sigmaStar = - nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); - Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar); - // in_re(x, _* ++ y' ++ _*) - Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero); + // indexof_re(x, y', 0) = -1 + Node noMatch = idx.eqNode(negOne); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); Node usno = nm->mkNode(APPLY_UF, us, numOcc); @@ -700,8 +775,9 @@ Node StringsPreprocess::reduce(Node t, lemmas.push_back(usno.eqNode(rem)); // Uf(0) = 0 lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); - // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*))) - lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate()); + // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 + lemmas.push_back( + nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne)); Node i = SkolemCache::mkIndexVar(t); Node bvli = nm->mkNode(BOUND_VAR_LIST, i); @@ -709,50 +785,57 @@ Node StringsPreprocess::reduce(Node t, nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); Node ip1 = nm->mkNode(PLUS, i, one); Node ufi = nm->mkNode(APPLY_UF, uf, i); - Node uli = nm->mkNode(APPLY_UF, ul, i); Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1); - Node ii = nm->mkNode(MINUS, ufip1, uli); - Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli); - Node pfxMatch = - nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); - Node nonMatch = - nm->mkNode(STRING_SUBSTR, - x, - ufi, - nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi)); + Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1); + // ii = Uf(i + 1) - Ul(i + 1) + Node ii = nm->mkNode(MINUS, ufip1, ulip1); std::vector<Node> flem; // Ul(i) > 0 - flem.push_back(nm->mkNode(GT, uli, zero)); - // Uf(i + 1) >= Uf(i) + Ul(i) - flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli))); - // in_re(substr(x, ii, Ul(i)), y') - flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp)); - // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) - flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate()); + flem.push_back(nm->mkNode(GT, ulip1, zero)); + // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) + flem.push_back(ufip1.eqNode( + nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1))); + // in_re(substr(x, ii, Ul(i + 1)), y') + flem.push_back(nm->mkNode( + STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp)); + Node l = SkolemCache::mkLengthVar(t); + Node bvll = nm->mkNode(BOUND_VAR_LIST, l); + Node lenBound = + nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1)); + Node shortestMatchBody = nm->mkNode( + OR, + lenBound.negate(), + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y) + .negate()); + // forall l. 0 < l < Ul(i + 1) => + // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') + flem.push_back(mkForallInternal(bvll, shortestMatchBody)); + Node pfxMatch = + nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) flem.push_back( nm->mkNode(APPLY_UF, us, i) .eqNode(nm->mkNode( STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1)))); - Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); Node forall = mkForallInternal(bvli, body); lemmas.push_back(forall); - // IF in_re(x, re.++(_*, y', _*)) - // THEN: + // IF indexof(x, y', 0) = -1 + // THEN: k = x + // ELSE: // numOcc > 0 ^ // k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ - // Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*))) + // Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^ // forall i. 0 <= i < nummOcc => // Ul(i) > 0 ^ - // Uf(i + 1) >= Uf(i) + Ul(i) ^ - // in_re(substr(x, ii, Ul(i)), y') ^ - // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^ + // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^ + // in_re(substr(x, ii, Ul(i + 1)), y') ^ + // forall l. 0 < l < Ul(i + 1) => + // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) // where ii = Uf(i + 1) - Ul(i) - // ELSE: k = x // where y' = re.diff(y, "") // // Conceptually, y' is the regex y but excluding the empty string (because @@ -760,8 +843,17 @@ Node StringsPreprocess::reduce(Node t, // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i) // is the length of the i^th match, and Us(i) is the result of processing // the remainder after processing the i^th occurrence of y in x. + // + // Visualization of Uf(i) and Ul(i): + // + // x = |------------| match 1 |-----------| match 2 |---| + // | | | + // Uf(0) Uf(1) Uf(2) + // + // |---------| |-----------| + // Ul(1) Ul(2) asserts.push_back( - nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x))); + nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas))); retNode = k; } else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index 6897d5272..c763557ca 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -383,8 +383,9 @@ TypeNode getOwnerStringType(Node n) { TypeNode tn; Kind k = n.getKind(); - if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN - || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX) + if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH + || k == STRING_STRCTN || k == SEQ_NTH || k == STRING_PREFIX + || k == STRING_SUFFIX) { // owning string type is the type of first argument tn = n[0].getType(); |