diff options
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | src/api/cvc4cpp.cpp | 12 | ||||
-rw-r--r-- | src/api/cvc4cppkind.h | 31 | ||||
-rw-r--r-- | src/parser/cvc/Cvc.g | 10 | ||||
-rw-r--r-- | src/parser/smt2/smt2.cpp | 8 | ||||
-rw-r--r-- | src/printer/smt2/smt2_printer.cpp | 12 | ||||
-rw-r--r-- | src/theory/datatypes/sygus_extension.cpp | 4 | ||||
-rw-r--r-- | src/theory/evaluator.cpp | 2 | ||||
-rw-r--r-- | src/theory/strings/extf_solver.cpp | 8 | ||||
-rw-r--r-- | src/theory/strings/kinds | 12 | ||||
-rw-r--r-- | src/theory/strings/skolem_cache.h | 4 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.cpp | 24 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_preprocess.cpp | 69 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_preprocess.h | 8 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_rewriter.cpp | 125 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_rewriter.h | 16 |
16 files changed, 279 insertions, 72 deletions
@@ -3,6 +3,12 @@ This file contains a summary of important user-visible changes. Changes since 1.7 ================= +New Features: +* Strings: + * Support for `str.replace_re`, `str.replace_re_all`, `str.is_digit`, + `str.from_code`, `re.diff`, and `re.comp` + * Support for new operator names (e.g. `str.in_re` instead of `str.in.re`) + Changes: * Java API change: The name of CVC4's package is now `edu.stanford.CVC4` instead of `edu.nyu.acsys.CVC4`. diff --git a/src/api/cvc4cpp.cpp b/src/api/cvc4cpp.cpp index 4e5f4604e..2683b5838 100644 --- a/src/api/cvc4cpp.cpp +++ b/src/api/cvc4cpp.cpp @@ -253,8 +253,10 @@ const static std::unordered_map<Kind, CVC4::Kind, KindHashFunction> s_kinds{ {STRING_CHARAT, CVC4::Kind::STRING_CHARAT}, {STRING_STRCTN, CVC4::Kind::STRING_STRCTN}, {STRING_STRIDOF, CVC4::Kind::STRING_STRIDOF}, - {STRING_STRREPL, CVC4::Kind::STRING_STRREPL}, - {STRING_STRREPLALL, CVC4::Kind::STRING_STRREPLALL}, + {STRING_REPLACE, CVC4::Kind::STRING_REPLACE}, + {STRING_REPLACE_ALL, CVC4::Kind::STRING_REPLACE_ALL}, + {STRING_REPLACE_RE, CVC4::Kind::STRING_REPLACE_RE}, + {STRING_REPLACE_RE_ALL, CVC4::Kind::STRING_REPLACE_RE_ALL}, {STRING_TOLOWER, CVC4::Kind::STRING_TOLOWER}, {STRING_TOUPPER, CVC4::Kind::STRING_TOUPPER}, {STRING_REV, CVC4::Kind::STRING_REV}, @@ -517,8 +519,10 @@ const static std::unordered_map<CVC4::Kind, Kind, CVC4::kind::KindHashFunction> {CVC4::Kind::STRING_CHARAT, STRING_CHARAT}, {CVC4::Kind::STRING_STRCTN, STRING_STRCTN}, {CVC4::Kind::STRING_STRIDOF, STRING_STRIDOF}, - {CVC4::Kind::STRING_STRREPL, STRING_STRREPL}, - {CVC4::Kind::STRING_STRREPLALL, STRING_STRREPLALL}, + {CVC4::Kind::STRING_REPLACE, STRING_REPLACE}, + {CVC4::Kind::STRING_REPLACE_ALL, STRING_REPLACE_ALL}, + {CVC4::Kind::STRING_REPLACE_RE, STRING_REPLACE_RE}, + {CVC4::Kind::STRING_REPLACE_RE_ALL, STRING_REPLACE_RE_ALL}, {CVC4::Kind::STRING_TOLOWER, STRING_TOLOWER}, {CVC4::Kind::STRING_TOUPPER, STRING_TOUPPER}, {CVC4::Kind::STRING_REV, STRING_REV}, diff --git a/src/api/cvc4cppkind.h b/src/api/cvc4cppkind.h index ca5696537..9cac4c566 100644 --- a/src/api/cvc4cppkind.h +++ b/src/api/cvc4cppkind.h @@ -1949,7 +1949,7 @@ enum CVC4_PUBLIC Kind : int32_t * mkTerm(Kind kind, Term child1, Term child2, Term child3) * mkTerm(Kind kind, const std::vector<Term>& children) */ - STRING_STRREPL, + STRING_REPLACE, /** * String replace all. * Replaces all occurrences of a string s2 in a string s1 with string s3. @@ -1962,7 +1962,34 @@ enum CVC4_PUBLIC Kind : int32_t * mkTerm(Kind kind, Term child1, Term child2, Term child3) * mkTerm(Kind kind, const std::vector<Term>& children) */ - STRING_STRREPLALL, + STRING_REPLACE_ALL, + /** + * String replace regular expression match. + * Replaces the first match of a regular expression r in string s1 with + * string s2. If r does not match a substring of s1, s1 is returned + * unmodified. + * Parameters: 3 + * -[1]: Term of sort String (string s1) + * -[2]: Term of sort Regexp (regexp r) + * -[3]: Term of sort String (string s2) + * Create with: + * mkTerm(Kind kind, Term child1, Term child2, Term child3) + * mkTerm(Kind kind, const std::vector<Term>& children) + */ + STRING_REPLACE_RE, + /** + * String replace all regular expression matches. + * Replaces all matches of a regular expression r in string s1 with string + * s2. If r does not match a substring of s1, s1 is returned unmodified. + * Parameters: 3 + * -[1]: Term of sort String (string s1) + * -[2]: Term of sort Regexp (regexp r) + * -[3]: Term of sort String (string s2) + * Create with: + * mkTerm(Kind kind, Term child1, Term child2, Term child3) + * mkTerm(Kind kind, const std::vector<Term>& children) + */ + STRING_REPLACE_RE_ALL, /** * String to lower case. * Parameters: 1 diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g index 33ca7bcf2..1a8014450 100644 --- a/src/parser/cvc/Cvc.g +++ b/src/parser/cvc/Cvc.g @@ -222,6 +222,8 @@ tokens { STRING_INDEXOF_TOK = 'INDEXOF'; STRING_REPLACE_TOK = 'REPLACE'; STRING_REPLACE_ALL_TOK = 'REPLACE_ALL'; + STRING_REPLACE_RE_TOK = 'REPLACE_RE'; + STRING_REPLACE_RE_ALL_TOK = 'REPLACE_RE_ALL'; STRING_PREFIXOF_TOK = 'PREFIXOF'; STRING_SUFFIXOF_TOK = 'SUFFIXOF'; STRING_STOI_TOK = 'STRING_TO_INTEGER'; @@ -2008,9 +2010,13 @@ stringTerm[CVC4::Expr& f] | STRING_INDEXOF_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN { f = MK_EXPR(CVC4::kind::STRING_STRIDOF, f, f2, f3); } | STRING_REPLACE_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN - { f = MK_EXPR(CVC4::kind::STRING_STRREPL, f, f2, f3); } + { f = MK_EXPR(CVC4::kind::STRING_REPLACE, f, f2, f3); } | STRING_REPLACE_ALL_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN - { f = MK_EXPR(CVC4::kind::STRING_STRREPLALL, f, f2, f3); } + { f = MK_EXPR(CVC4::kind::STRING_REPLACE_ALL, f, f2, f3); } + | STRING_REPLACE_RE_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN + { f = MK_EXPR(CVC4::kind::STRING_REPLACE_RE, f, f2, f3); } + | STRING_REPLACE_RE_ALL_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN + { f = MK_EXPR(CVC4::kind::STRING_REPLACE_RE_ALL, f, f2, f3); } | STRING_PREFIXOF_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::STRING_PREFIX, f, f2); } | STRING_SUFFIXOF_TOK LPAREN formula[f] COMMA formula[f2] RPAREN diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index d915d2ab4..8b526cf71 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -162,7 +162,9 @@ void Smt2::addStringOperators() { addOperator(kind::STRING_STRCTN, "str.contains" ); addOperator(kind::STRING_CHARAT, "str.at" ); addOperator(kind::STRING_STRIDOF, "str.indexof" ); - addOperator(kind::STRING_STRREPL, "str.replace" ); + addOperator(kind::STRING_REPLACE, "str.replace"); + addOperator(kind::STRING_REPLACE_RE, "str.replace_re"); + addOperator(kind::STRING_REPLACE_RE_ALL, "str.replace_re_all"); if (!strictModeEnabled()) { addOperator(kind::STRING_TOLOWER, "str.tolower"); @@ -180,7 +182,7 @@ void Smt2::addStringOperators() { addOperator(kind::STRING_IN_REGEXP, "str.in_re"); addOperator(kind::STRING_TO_REGEXP, "str.to_re"); addOperator(kind::STRING_CODE, "str.to_code"); - addOperator(kind::STRING_STRREPLALL, "str.replace_all"); + addOperator(kind::STRING_REPLACE_ALL, "str.replace_all"); } else { @@ -189,7 +191,7 @@ void Smt2::addStringOperators() { addOperator(kind::STRING_IN_REGEXP, "str.in.re"); addOperator(kind::STRING_TO_REGEXP, "str.to.re"); addOperator(kind::STRING_CODE, "str.code"); - addOperator(kind::STRING_STRREPLALL, "str.replaceall"); + addOperator(kind::STRING_REPLACE_ALL, "str.replaceall"); } addOperator(kind::REGEXP_CONCAT, "re.++"); diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index 6fdbd4adb..b530fa16a 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -644,8 +644,10 @@ void Smt2Printer::toStream(std::ostream& out, case kind::STRING_CHARAT: case kind::STRING_STRCTN: case kind::STRING_STRIDOF: - case kind::STRING_STRREPL: - case kind::STRING_STRREPLALL: + case kind::STRING_REPLACE: + case kind::STRING_REPLACE_ALL: + case kind::STRING_REPLACE_RE: + case kind::STRING_REPLACE_RE_ALL: case kind::STRING_TOLOWER: case kind::STRING_TOUPPER: case kind::STRING_REV: @@ -1207,9 +1209,11 @@ static string smtKindString(Kind k, Variant v) case kind::STRING_STRCTN: return "str.contains" ; case kind::STRING_CHARAT: return "str.at" ; case kind::STRING_STRIDOF: return "str.indexof" ; - case kind::STRING_STRREPL: return "str.replace" ; - case kind::STRING_STRREPLALL: + case kind::STRING_REPLACE: return "str.replace"; + case kind::STRING_REPLACE_ALL: return v == smt2_6_1_variant ? "str.replace_all" : "str.replaceall"; + case kind::STRING_REPLACE_RE: return "str.replace_re"; + case kind::STRING_REPLACE_RE_ALL: return "str.replace_re_all"; case kind::STRING_TOLOWER: return "str.tolower"; case kind::STRING_TOUPPER: return "str.toupper"; case kind::STRING_REV: return "str.rev"; diff --git a/src/theory/datatypes/sygus_extension.cpp b/src/theory/datatypes/sygus_extension.cpp index 95b73b2fe..2dfc87fd6 100644 --- a/src/theory/datatypes/sygus_extension.cpp +++ b/src/theory/datatypes/sygus_extension.cpp @@ -768,12 +768,12 @@ Node SygusExtension::getSimpleSymBreakPred(Node e, deq_child[1].push_back(1); } } - if (nk == ITE || nk == STRING_STRREPL || nk == STRING_STRREPLALL) + if (nk == ITE || nk == STRING_REPLACE || nk == STRING_REPLACE_ALL) { deq_child[0].push_back(1); deq_child[1].push_back(2); } - if (nk == STRING_STRREPL || nk == STRING_STRREPLALL) + if (nk == STRING_REPLACE || nk == STRING_REPLACE_ALL) { deq_child[0].push_back(0); deq_child[1].push_back(1); diff --git a/src/theory/evaluator.cpp b/src/theory/evaluator.cpp index a3ea768d4..70649a5f2 100644 --- a/src/theory/evaluator.cpp +++ b/src/theory/evaluator.cpp @@ -538,7 +538,7 @@ EvalResult Evaluator::evalInternal( break; } - case kind::STRING_STRREPL: + case kind::STRING_REPLACE: { const String& s = results[currNode[0]].d_str; const String& x = results[currNode[1]].d_str; diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 6ab74cf9a..e8c88ffbb 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -49,8 +49,8 @@ ExtfSolver::ExtfSolver(context::Context* c, d_extt->addFunctionKind(kind::STRING_STRIDOF); d_extt->addFunctionKind(kind::STRING_ITOS); d_extt->addFunctionKind(kind::STRING_STOI); - d_extt->addFunctionKind(kind::STRING_STRREPL); - d_extt->addFunctionKind(kind::STRING_STRREPLALL); + d_extt->addFunctionKind(kind::STRING_REPLACE); + d_extt->addFunctionKind(kind::STRING_REPLACE_ALL); d_extt->addFunctionKind(kind::STRING_STRCTN); d_extt->addFunctionKind(kind::STRING_IN_REGEXP); d_extt->addFunctionKind(kind::STRING_LEQ); @@ -170,8 +170,8 @@ bool ExtfSolver::doReduction(int effort, Node n, bool& isCd) { NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF - || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL - || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER + || k == STRING_ITOS || k == STRING_STOI || k == STRING_REPLACE + || k == STRING_REPLACE_ALL || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); std::vector<Node> new_nodes; Node res = d_preproc.simplify(n, new_nodes); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 052b75302..f9ce036bc 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -21,8 +21,10 @@ operator STRING_STRCTN 2 "string contains" operator STRING_LT 2 "string less than" operator STRING_LEQ 2 "string less than or equal" operator STRING_STRIDOF 3 "string indexof" -operator STRING_STRREPL 3 "string replace" -operator STRING_STRREPLALL 3 "string replace all" +operator STRING_REPLACE 3 "string replace" +operator STRING_REPLACE_ALL 3 "string replace" +operator STRING_REPLACE_RE 3 "string replace regular expression match" +operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches" operator STRING_PREFIX 2 "string prefixof" operator STRING_SUFFIX 2 "string suffixof" operator STRING_ITOS 1 "integer to string" @@ -98,8 +100,10 @@ typerule STRING_STRCTN "SimpleTypeRule<RBool, AString, AString>" typerule STRING_LT "SimpleTypeRule<RBool, AString, AString>" typerule STRING_LEQ "SimpleTypeRule<RBool, AString, AString>" typerule STRING_STRIDOF "SimpleTypeRule<RInteger, AString, AString, AInteger>" -typerule STRING_STRREPL "SimpleTypeRule<RString, AString, AString, AString>" -typerule STRING_STRREPLALL "SimpleTypeRule<RString, AString, AString, AString>" +typerule STRING_REPLACE "SimpleTypeRule<RString, AString, AString, AString>" +typerule STRING_REPLACE_ALL "SimpleTypeRule<RString, AString, AString, AString>" +typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>" +typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>" typerule STRING_PREFIX "SimpleTypeRule<RBool, AString, AString>" typerule STRING_SUFFIX "SimpleTypeRule<RBool, AString, AString>" typerule STRING_ITOS "SimpleTypeRule<RString, AInteger>" diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 8fcf46c14..b6a3fc15b 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -108,6 +108,10 @@ class SkolemCache // k(x) is the end index of the x^th occurrence of b in a // where n is the number of occurrences of b in a, and k(0)=0. SK_OCCUR_INDEX, + + SK_FIRST_MATCH_PRE, + SK_FIRST_MATCH, + SK_FIRST_MATCH_POST, }; /** * Returns a skolem of type string that is cached for (a,b,id) and has diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 92af3cc0a..41ac5b787 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -102,8 +102,8 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine.addFunctionKind(kind::STRING_ITOS); d_equalityEngine.addFunctionKind(kind::STRING_STOI); d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF); - d_equalityEngine.addFunctionKind(kind::STRING_STRREPL); - d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); + d_equalityEngine.addFunctionKind(kind::STRING_REPLACE); + d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_ALL); d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER); d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER); d_equalityEngine.addFunctionKind(kind::STRING_REV); @@ -511,12 +511,12 @@ void TheoryStrings::preRegisterTerm(TNode n) { << "TheoryString::preregister : " << n << std::endl; //check for logic exceptions Kind k = n.getKind(); + Assert(k != STRING_REPLACE_RE && k != STRING_REPLACE_RE_ALL); if( !options::stringExp() ){ - if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS - || k == kind::STRING_STOI || k == kind::STRING_STRREPL - || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN - || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER - || k == STRING_REV) + if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI + || k == STRING_REPLACE || k == STRING_REPLACE_ALL + || k == kind::STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER + || k == STRING_TOUPPER || k == STRING_REV) { std::stringstream ss; ss << "Term of kind " << k @@ -595,6 +595,16 @@ void TheoryStrings::preRegisterTerm(TNode n) { Node TheoryStrings::expandDefinition(LogicRequest &logicRequest, Node node) { Trace("strings-exp-def") << "TheoryStrings::expandDefinition : " << node << std::endl; + + NodeManager* nm = NodeManager::currentNM(); + if (node.getKind() == STRING_REPLACE_RE) + { + Node k = nm->mkBoundVar(nm->stringType()); + Node bvl = nm->mkNode(BOUND_VAR_LIST, k); + Node body = d_esolver->getPreprocess()->simplifyReplaceRe(k, node); + node = nm->mkNode(CHOICE, bvl, body); + } + return node; } diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 6272ad129..62eb965ad 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -347,7 +347,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { retNode = stoit; } - else if (t.getKind() == kind::STRING_STRREPL) + else if (t.getKind() == kind::STRING_REPLACE) { // processing term: replace( x, y, z ) Node x = t[0]; @@ -407,7 +407,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // Thus, replace( x, y, z ) = rpw. retNode = rpw; } - else if (t.getKind() == kind::STRING_STRREPLALL) + else if (t.getKind() == kind::STRING_REPLACE_ALL) { // processing term: replaceall( x, y, z ) Node x = t[0]; @@ -716,6 +716,71 @@ void StringsPreprocess::processAssertions( std::vector< Node > &vec_node ){ } } +Node StringsPreprocess::simplifyReplaceRe(Node k, Node t) +{ + Assert(t.getKind() == STRING_REPLACE_RE); + NodeManager* nm = NodeManager::currentNM(); + Node x = t[0]; + Node y = t[1]; + Node z = t[2]; + + std::vector<Node> emptyVec; + Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); + Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar); + // str.in.re(x, _* ++ y ++ _*) + Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + + // str.in.re("", y) + Node matchesEmpty = nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y); + // k = z ++ x + Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x)); + + Node k1 = + d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre"); + Node k2 = + d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match"); + Node k3 = + d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post"); + // x = k1 ++ k2 ++ k3 + Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); + // not(str.in.re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), _* ++ y ++ _*)) + Node k2len = nm->mkNode(STRING_LENGTH, k2); + Node firstMatch = + nm->mkNode(STRING_IN_REGEXP, + nm->mkNode(STRING_CONCAT, + k1, + nm->mkNode(STRING_SUBSTR, + k2, + d_zero, + nm->mkNode(MINUS, k2len, d_one))), + re) + .negate(); + // str.in.re(k2, y) + Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y); + // k = k1 ++ z ++ k3 + Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); + + // IF str.in.re(x, _* ++ y ++ _*) + // THEN: + // IF str.in.re("", y) + // THEN: k = z ++ x + // ELSE: + // x = k1 ++ k2 ++ k3 AND + // not(str.in.re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), _* ++ y ++ _*)) + // AND str.in.re(k2, y) AND k = k1 ++ z ++ k3 + // ELSE: k = "" + return nm->mkNode( + ITE, + hasMatch, + nm->mkNode(ITE, + matchesEmpty, + res1, + nm->mkNode(AND, splitX, firstMatch, k2Match, res2)), + k.eqNode(x)); +} + +Node StringsPreprocess::simplifyReplaceReAll(Node k, Node t) { return Node(); } + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h index b96d619ef..3f6f5210e 100644 --- a/src/theory/strings/theory_strings_preprocess.h +++ b/src/theory/strings/theory_strings_preprocess.h @@ -64,6 +64,14 @@ public: */ void processAssertions(std::vector<Node> &vec_node); + /** + * Reduces a term `str.replace_re(t, s, u)`. + * + * @param k + */ + Node simplifyReplaceRe(Node k, Node t); + Node simplifyReplaceReAll(Node k, Node t); + private: /** commonly used constants */ Node d_zero; diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 9cd4c1ecc..c8c78874d 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -473,7 +473,7 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) if (node[i] == empty) { Node ne = node[1 - i]; - if (ne.getKind() == STRING_STRREPL) + if (ne.getKind() == STRING_REPLACE) { // (= "" (str.replace x y x)) ---> (= x "") if (ne[0] == ne[2]) @@ -531,7 +531,7 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) // ------- rewrites for (= (str.replace _ _ _) _) for (size_t i = 0; i < 2; i++) { - if (node[i].getKind() == STRING_STRREPL) + if (node[i].getKind() == STRING_REPLACE) { Node repl = node[i]; Node x = node[1 - i]; @@ -540,7 +540,7 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) if (checkEntailNonEmpty(x) && repl[0] == empty) { Node ret = nm->mkNode( - EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1])); + EQUAL, empty, nm->mkNode(STRING_REPLACE, x, repl[2], repl[1])); return returnRewrite(node, ret, "str-eq-repl-emp"); } @@ -1590,7 +1590,7 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); } } - else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL) + else if (nk0 == STRING_REPLACE || nk0 == STRING_REPLACE_ALL) { Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1])); Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2])); @@ -1635,14 +1635,22 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { { retNode = rewriteIndexof( node ); } - else if (nk == kind::STRING_STRREPL) + else if (nk == kind::STRING_REPLACE) { retNode = rewriteReplace( node ); } - else if (nk == kind::STRING_STRREPLALL) + else if (nk == kind::STRING_REPLACE_ALL) { retNode = rewriteReplaceAll(node); } + else if (nk == kind::STRING_REPLACE_RE) + { + retNode = rewriteReplaceRe(node); + } + else if (nk == kind::STRING_REPLACE_RE_ALL) + { + retNode = rewriteReplaceReAll(node); + } else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) { retNode = rewriteStrConvert(node); @@ -1862,7 +1870,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) return returnRewrite(node, ret, "ss-start-geq-len"); } } - else if (node[0].getKind() == STRING_STRREPL) + else if (node[0].getKind() == STRING_REPLACE) { // (str.substr (str.replace x y z) 0 n) // ---> (str.replace (str.substr x 0 n) y z) @@ -1873,7 +1881,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) && checkEntailLengthOne(node[0][2], true)) { Node ret = nm->mkNode( - kind::STRING_STRREPL, + kind::STRING_REPLACE, nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]), node[0][1], node[0][2]); @@ -2129,7 +2137,7 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { // str.contains( x, "A" ) OR str.contains( y, "A" ) return returnRewrite(node, ret, "ctn-concat-char"); } - else if (node[0].getKind() == STRING_STRREPL) + else if (node[0].getKind() == STRING_REPLACE) { Node rplDomain = checkEntailContains(node[0][1], node[1]); if (!rplDomain.isNull() && !rplDomain.getConst<bool>()) @@ -2175,7 +2183,7 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { for (const Node& n : nc2) { - if (n.getKind() == kind::STRING_STRREPL) + if (n.getKind() == kind::STRING_REPLACE) { // (str.contains x (str.replace y z w)) --> false // if (str.contains x y) = false and (str.contains x w) = false @@ -2308,7 +2316,7 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { return returnRewrite(node, ret, "ctn-substr"); } } - else if (node[0].getKind() == kind::STRING_STRREPL) + else if (node[0].getKind() == kind::STRING_REPLACE) { if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst()) { @@ -2362,14 +2370,14 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { Node empty = nm->mkConst(String("")); Node ret = nm->mkNode( kind::STRING_STRCTN, - nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty), + nm->mkNode(kind::STRING_REPLACE, node[0][0], node[0][1], empty), node[1]); return returnRewrite(node, ret, "ctn-repl-simp-repl"); } } } - if (node[1].getKind() == kind::STRING_STRREPL) + if (node[1].getKind() == kind::STRING_REPLACE) { // (str.contains x (str.replace y x y)) ---> // (str.contains x y) @@ -2609,7 +2617,7 @@ Node TheoryStringsRewriter::rewriteIndexof( Node node ) { } Node TheoryStringsRewriter::rewriteReplace( Node node ) { - Assert(node.getKind() == kind::STRING_STRREPL); + Assert(node.getKind() == kind::STRING_REPLACE); NodeManager* nm = NodeManager::currentNM(); if (node[1].isConst() && node[1].getConst<String>().isEmptyString()) @@ -2691,7 +2699,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodes); if (node[1] != nn1) { - Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]); + Node ret = nm->mkNode(STRING_REPLACE, node[0], nn1, node[2]); return returnRewrite(node, ret, "rpl-x-y-x-simp"); } } @@ -2736,7 +2744,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { // this is independent of whether the second argument may be empty std::vector<Node> cc; cc.push_back(NodeManager::currentNM()->mkNode( - kind::STRING_STRREPL, + kind::STRING_REPLACE, utils::mkConcat(STRING_CONCAT, children0), node[1], node[2])); @@ -2792,14 +2800,14 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodesList); if (nn1 != node[1] || nn2 != node[2]) { - Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2); + Node res = nm->mkNode(kind::STRING_REPLACE, node[0], nn1, nn2); return returnRewrite(node, res, "rpl-emp-cnts-substs"); } } if (nn2 != node[2]) { - Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2); + Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1], nn2); return returnRewrite(node, res, "rpl-cnts-substs"); } } @@ -2820,7 +2828,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { std::vector<Node> cc; cc.insert(cc.end(), cb.begin(), cb.end()); cc.push_back(NodeManager::currentNM()->mkNode( - kind::STRING_STRREPL, + kind::STRING_REPLACE, utils::mkConcat(STRING_CONCAT, children0), node[1], node[2])); @@ -2864,7 +2872,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { lastChild1[1], nm->mkNode( kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1)))); - Node res = nm->mkNode(kind::STRING_STRREPL, + Node res = nm->mkNode(kind::STRING_REPLACE, node[0], utils::mkConcat(STRING_CONCAT, children1), node[2]); @@ -2872,7 +2880,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { } } - if (node[0].getKind() == STRING_STRREPL) + if (node[0].getKind() == STRING_REPLACE) { Node x = node[0]; Node y = node[1]; @@ -2912,8 +2920,8 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z)); if (wEqZ.isConst() && !wEqZ.getConst<bool>()) { - Node ret = nm->mkNode(kind::STRING_STRREPL, - nm->mkNode(kind::STRING_STRREPL, y, w, z), + Node ret = nm->mkNode(kind::STRING_REPLACE, + nm->mkNode(kind::STRING_REPLACE, y, w, z), y, z); return returnRewrite(node, ret, "repl-repl-short-circuit"); @@ -2922,7 +2930,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { } } - if (node[1].getKind() == STRING_STRREPL) + if (node[1].getKind() == STRING_REPLACE) { if (node[1][0] == node[0]) { @@ -2998,11 +3006,11 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { } if (invSuccess) { - Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]); + Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1][0], node[2]); return returnRewrite(node, res, "repl-repl2-inv"); } } - if (node[2].getKind() == STRING_STRREPL) + if (node[2].getKind() == STRING_REPLACE) { if (node[2][1] == node[0]) { @@ -3012,7 +3020,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { if (!cmp_con.isNull() && !cmp_con.getConst<bool>()) { Node res = - nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]); + nm->mkNode(kind::STRING_REPLACE, node[0], node[1], node[2][0]); return returnRewrite(node, res, "repl-repl3-inv"); } } @@ -3072,7 +3080,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { Node rem = utils::mkConcat(STRING_CONCAT, remc); Node ret = nm->mkNode(STRING_CONCAT, - nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]), + nm->mkNode(STRING_REPLACE, lastLhs, node[1], node[2]), rem); // for example: // str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x @@ -3096,7 +3104,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { Node TheoryStringsRewriter::rewriteReplaceAll(Node node) { - Assert(node.getKind() == STRING_STRREPLALL); + Assert(node.getKind() == STRING_REPLACE_ALL); NodeManager* nm = NodeManager::currentNM(); if (node[0].isConst() && node[1].isConst()) @@ -3144,10 +3152,57 @@ Node TheoryStringsRewriter::rewriteReplaceAll(Node node) return node; } +Node TheoryStringsRewriter::rewriteReplaceRe(Node node) +{ + Assert(node.getKind() == STRING_REPLACE_RE); + NodeManager* nm = NodeManager::currentNM(); + + if (node[0].isConst()) + { + std::vector<Node> emptyVec; + Node sigmaStar = + nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); + Node re = nm->mkNode(REGEXP_CONCAT, node[1], sigmaStar); + String s = node[0].getConst<String>(); + for (size_t i = 0, size = s.size(); i < size; i++) + { + if (testConstStringInRegExp(s, i, re)) + { + for (size_t j = i; j < size; j++) + { + String substr = s.substr(i, j - i); + if (testConstStringInRegExp(substr, 0, node[1])) + { + Node ret = nm->mkNode(STRING_CONCAT, + nm->mkConst(s.substr(0, i)), + node[1], + nm->mkConst(s.substr(j))); + return returnRewrite(node, ret, "replace_re-eval"); + } + } + } + } + } + + return node; +} + +Node TheoryStringsRewriter::rewriteReplaceReAll(Node node) +{ + Assert(node.getKind() == STRING_REPLACE_RE_ALL); + NodeManager* nm = NodeManager::currentNM(); + + if (node[0].isConst() && node[2].isConst()) + { + } + + return node; +} + Node TheoryStringsRewriter::rewriteReplaceInternal(Node node) { Kind nk = node.getKind(); - Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL); + Assert(nk == STRING_REPLACE || nk == STRING_REPLACE_ALL); if (node[1] == node[2]) { @@ -3157,7 +3212,7 @@ Node TheoryStringsRewriter::rewriteReplaceInternal(Node node) if (node[0] == node[1]) { // only holds for replaceall if non-empty - if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1])) + if (nk == STRING_REPLACE || checkEntailNonEmpty(node[1])) { return returnRewrite(node, node[2], "rpl-replace"); } @@ -3857,7 +3912,7 @@ bool TheoryStringsRewriter::componentContainsBase( if (!computeRemainder && dir == 0) { - if (n1.getKind() == STRING_STRREPL) + if (n1.getKind() == STRING_REPLACE) { // (str.contains (str.replace x y z) w) ---> true // if (str.contains x w) --> true and (str.contains z w) ---> true @@ -4576,7 +4631,7 @@ void TheoryStringsRewriter::getArithApproximations(Node a, } } } - else if (aak == STRING_STRREPL) + else if (aak == STRING_REPLACE) { // over,under-approximations for len( replace( x, y, z ) ) // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) ) @@ -4832,7 +4887,7 @@ Node TheoryStringsRewriter::getMultisetApproximation(Node a) { return a[0]; } - else if (a.getKind() == STRING_STRREPL) + else if (a.getKind() == STRING_REPLACE) { return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); } @@ -5200,7 +5255,7 @@ Node TheoryStringsRewriter::getStringOrEmpty(Node n) { switch (n.getKind()) { - case kind::STRING_STRREPL: + case kind::STRING_REPLACE: { Node empty = nm->mkConst(::CVC4::String("")); if (n[0] == empty) diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index c9733433c..ef4bcba84 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -211,12 +211,24 @@ class TheoryStringsRewriter : public TheoryRewriter * Returns the rewritten form of node. */ static Node rewriteReplace(Node node); - /** rewrite replace all + /** rewrite replace_all * This is the entry point for post-rewriting terms n of the form - * str.replaceall( s, t, r ) + * str.replace_all( s, t, r ) * Returns the rewritten form of node. */ static Node rewriteReplaceAll(Node node); + /** rewrite replace_re + * This is the entry point for post-rewriting terms n of the form + * str.replace_re( s, t, r ) + * Returns the rewritten form of node. + */ + static Node rewriteReplaceRe(Node node); + /** rewrite replace_re_all + * This is the entry point for post-rewriting terms n of the form + * str.replace_re_all( s, t, r ) + * Returns the rewritten form of node. + */ + static Node rewriteReplaceReAll(Node node); /** rewrite replace internal * * This method implements rewrite rules that apply to both str.replace and |