summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Noetzli <andres.noetzli@gmail.com>2020-06-10 12:50:52 -0700
committerGitHub <noreply@github.com>2020-06-10 12:50:52 -0700
commit2da2646dd65e0458311a2dccfb04850c0b7d9e3c (patch)
tree00d31835b3ad0c00064ae2b43a2a59844f418dd0
parent05c099890ae908e495ceaf26509b87896fd0ad54 (diff)
Add support for str.replace_re/str.replace_re_all (#4594)
This commit adds support for the last remaining string operators from the new SMT-LIB standard for the theory of strings. The commit adds the kinds, type checking, reductions, and evaluation rewrites for `str.replace_re` and `str.replace_re_all`.
-rw-r--r--NEWS8
-rw-r--r--src/api/cvc4cpp.cpp4
-rw-r--r--src/api/cvc4cppkind.h27
-rw-r--r--src/parser/smt2/smt2.cpp2
-rw-r--r--src/printer/smt2/smt2_printer.cpp4
-rw-r--r--src/theory/strings/extf_solver.cpp7
-rw-r--r--src/theory/strings/kinds4
-rw-r--r--src/theory/strings/rewrites.cpp2
-rw-r--r--src/theory/strings/rewrites.h2
-rw-r--r--src/theory/strings/sequences_rewriter.cpp116
-rw-r--r--src/theory/strings/sequences_rewriter.h27
-rw-r--r--src/theory/strings/skolem_cache.h18
-rw-r--r--src/theory/strings/term_registry.cpp7
-rw-r--r--src/theory/strings/theory_strings.cpp3
-rw-r--r--src/theory/strings/theory_strings_preprocess.cpp169
-rw-r--r--test/regress/CMakeLists.txt4
-rw-r--r--test/regress/regress2/strings/replace_re.smt242
-rw-r--r--test/regress/regress2/strings/replace_re_all.smt231
-rw-r--r--test/unit/theory/sequences_rewriter_white.h222
19 files changed, 693 insertions, 6 deletions
diff --git a/NEWS b/NEWS
index ac9f0747e..8c6ec7bc9 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,14 @@ This file contains a summary of important user-visible changes.
Changes since 1.7
=================
+New Features:
+* Strings: Full support of the new SMT-LIB standard for the theory of strings,
+ including:
+ * Support for `str.replace_re`, `str.replace_re_all`, `str.is_digit`,
+ `str.from_code`, `re.diff`, and `re.comp`
+ * Support for new operator names (e.g. `str.in_re` instead of `str.in.re`),
+ new escape sequences. The new syntax is enabled by default for smt2 files.
+
Improvements:
* API: Function definitions can now be requested to be global. If the `global`
parameter is set to true, they persist after popping the user context.
diff --git a/src/api/cvc4cpp.cpp b/src/api/cvc4cpp.cpp
index 325da8cdb..f0d28401e 100644
--- a/src/api/cvc4cpp.cpp
+++ b/src/api/cvc4cpp.cpp
@@ -257,6 +257,8 @@ const static std::unordered_map<Kind, CVC4::Kind, KindHashFunction> s_kinds{
{STRING_INDEXOF, CVC4::Kind::STRING_STRIDOF},
{STRING_REPLACE, CVC4::Kind::STRING_STRREPL},
{STRING_REPLACE_ALL, CVC4::Kind::STRING_STRREPLALL},
+ {STRING_REPLACE_RE, CVC4::Kind::STRING_REPLACE_RE},
+ {STRING_REPLACE_RE_ALL, CVC4::Kind::STRING_REPLACE_RE_ALL},
{STRING_TOLOWER, CVC4::Kind::STRING_TOLOWER},
{STRING_TOUPPER, CVC4::Kind::STRING_TOUPPER},
{STRING_REV, CVC4::Kind::STRING_REV},
@@ -526,6 +528,8 @@ const static std::unordered_map<CVC4::Kind, Kind, CVC4::kind::KindHashFunction>
{CVC4::Kind::STRING_STRIDOF, STRING_INDEXOF},
{CVC4::Kind::STRING_STRREPL, STRING_REPLACE},
{CVC4::Kind::STRING_STRREPLALL, STRING_REPLACE_ALL},
+ {CVC4::Kind::STRING_REPLACE_RE, STRING_REPLACE_RE},
+ {CVC4::Kind::STRING_REPLACE_RE_ALL, STRING_REPLACE_RE_ALL},
{CVC4::Kind::STRING_TOLOWER, STRING_TOLOWER},
{CVC4::Kind::STRING_TOUPPER, STRING_TOUPPER},
{CVC4::Kind::STRING_REV, STRING_REV},
diff --git a/src/api/cvc4cppkind.h b/src/api/cvc4cppkind.h
index 02f13310a..82f1eb379 100644
--- a/src/api/cvc4cppkind.h
+++ b/src/api/cvc4cppkind.h
@@ -2003,6 +2003,33 @@ enum CVC4_PUBLIC Kind : int32_t
*/
STRING_REPLACE_ALL,
/**
+ * String replace regular expression match.
+ * Replaces the first match of a regular expression r in string s1 with
+ * string s2. If r does not match a substring of s1, s1 is returned
+ * unmodified.
+ * Parameters: 3
+ * -[1]: Term of sort String (string s1)
+ * -[2]: Term of sort Regexp (regexp r)
+ * -[3]: Term of sort String (string s2)
+ * Create with:
+ * mkTerm(Kind kind, Term child1, Term child2, Term child3)
+ * mkTerm(Kind kind, const std::vector<Term>& children)
+ */
+ STRING_REPLACE_RE,
+ /**
+ * String replace all regular expression matches.
+ * Replaces all matches of a regular expression r in string s1 with string
+ * s2. If r does not match a substring of s1, s1 is returned unmodified.
+ * Parameters: 3
+ * -[1]: Term of sort String (string s1)
+ * -[2]: Term of sort Regexp (regexp r)
+ * -[3]: Term of sort String (string s2)
+ * Create with:
+ * mkTerm(Kind kind, Term child1, Term child2, Term child3)
+ * mkTerm(Kind kind, const std::vector<Term>& children)
+ */
+ STRING_REPLACE_RE_ALL,
+ /**
* String to lower case.
* Parameters: 1
* -[1]: Term of String sort
diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp
index 3f25e3776..5f328d462 100644
--- a/src/parser/smt2/smt2.cpp
+++ b/src/parser/smt2/smt2.cpp
@@ -156,6 +156,8 @@ void Smt2::addStringOperators() {
addOperator(api::STRING_CHARAT, "str.at");
addOperator(api::STRING_INDEXOF, "str.indexof");
addOperator(api::STRING_REPLACE, "str.replace");
+ addOperator(api::STRING_REPLACE_RE, "str.replace_re");
+ addOperator(api::STRING_REPLACE_RE_ALL, "str.replace_re_all");
if (!strictModeEnabled())
{
addOperator(api::STRING_TOLOWER, "str.tolower");
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp
index 6670fa065..18d60d779 100644
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -632,6 +632,8 @@ void Smt2Printer::toStream(std::ostream& out,
case kind::STRING_STRIDOF:
case kind::STRING_STRREPL:
case kind::STRING_STRREPLALL:
+ case kind::STRING_REPLACE_RE:
+ case kind::STRING_REPLACE_RE_ALL:
case kind::STRING_TOLOWER:
case kind::STRING_TOUPPER:
case kind::STRING_REV:
@@ -1191,6 +1193,8 @@ static string smtKindString(Kind k, Variant v)
case kind::STRING_STRIDOF: return "str.indexof" ;
case kind::STRING_STRREPL: return "str.replace" ;
case kind::STRING_STRREPLALL: return "str.replace_all";
+ case kind::STRING_REPLACE_RE: return "str.replace_re";
+ case kind::STRING_REPLACE_RE_ALL: return "str.replace_re_all";
case kind::STRING_TOLOWER: return "str.tolower";
case kind::STRING_TOUPPER: return "str.toupper";
case kind::STRING_REV: return "str.rev";
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp
index dd4144313..9b7af4f13 100644
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -56,6 +56,8 @@ ExtfSolver::ExtfSolver(context::Context* c,
d_extt.addFunctionKind(kind::STRING_STOI);
d_extt.addFunctionKind(kind::STRING_STRREPL);
d_extt.addFunctionKind(kind::STRING_STRREPLALL);
+ d_extt.addFunctionKind(kind::STRING_REPLACE_RE);
+ d_extt.addFunctionKind(kind::STRING_REPLACE_RE_ALL);
d_extt.addFunctionKind(kind::STRING_STRCTN);
d_extt.addFunctionKind(kind::STRING_IN_REGEXP);
d_extt.addFunctionKind(kind::STRING_LEQ);
@@ -180,8 +182,9 @@ bool ExtfSolver::doReduction(int effort, Node n)
NodeManager* nm = NodeManager::currentNM();
Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
|| k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL
- || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER
- || k == STRING_TOUPPER || k == STRING_REV);
+ || k == STRING_STRREPLALL || k == STRING_REPLACE_RE
+ || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ
+ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV);
std::vector<Node> new_nodes;
Node res = d_preproc.simplify(n, new_nodes);
Assert(res != n);
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds
index 800847ffe..96ba82a44 100644
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -23,6 +23,10 @@ operator STRING_LEQ 2 "string less than or equal"
operator STRING_STRIDOF 3 "string indexof"
operator STRING_STRREPL 3 "string replace"
operator STRING_STRREPLALL 3 "string replace all"
+operator STRING_REPLACE_RE 3 "string replace regular expression match"
+operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches"
+typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
+typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
operator STRING_PREFIX 2 "string prefixof"
operator STRING_SUFFIX 2 "string suffixof"
operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit"
diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp
index a4055c4f9..5f3c83869 100644
--- a/src/theory/strings/rewrites.cpp
+++ b/src/theory/strings/rewrites.cpp
@@ -111,6 +111,8 @@ const char* toString(Rewrite r)
case Rewrite::RPL_RPL_EMPTY: return "RPL_RPL_EMPTY";
case Rewrite::RPL_RPL_LEN_ID: return "RPL_RPL_LEN_ID";
case Rewrite::RPL_X_Y_X_SIMP: return "RPL_X_Y_X_SIMP";
+ case Rewrite::REPLACE_RE_EVAL: return "REPLACE_RE_EVAL";
+ case Rewrite::REPLACE_RE_ALL_EVAL: return "REPLACE_RE_ALL_EVAL";
case Rewrite::SPLIT_EQ: return "SPLIT_EQ";
case Rewrite::SPLIT_EQ_STRIP_L: return "SPLIT_EQ_STRIP_L";
case Rewrite::SPLIT_EQ_STRIP_R: return "SPLIT_EQ_STRIP_R";
diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h
index 96a3b65fd..62350c403 100644
--- a/src/theory/strings/rewrites.h
+++ b/src/theory/strings/rewrites.h
@@ -116,6 +116,8 @@ enum class Rewrite : uint32_t
RPL_RPL_EMPTY,
RPL_RPL_LEN_ID,
RPL_X_Y_X_SIMP,
+ REPLACE_RE_EVAL,
+ REPLACE_RE_ALL_EVAL,
SPLIT_EQ,
SPLIT_EQ_STRIP_L,
SPLIT_EQ_STRIP_R,
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp
index 4f74d7c15..3c40062f5 100644
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -1413,6 +1413,14 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node)
{
retNode = rewriteReplaceAll(node);
}
+ else if (nk == kind::STRING_REPLACE_RE)
+ {
+ retNode = rewriteReplaceRe(node);
+ }
+ else if (nk == kind::STRING_REPLACE_RE_ALL)
+ {
+ retNode = rewriteReplaceReAll(node);
+ }
else if (nk == STRING_REV)
{
retNode = rewriteStrReverse(node);
@@ -2914,6 +2922,114 @@ Node SequencesRewriter::rewriteReplaceInternal(Node node)
return Node::null();
}
+Node SequencesRewriter::rewriteReplaceRe(Node node)
+{
+ Assert(node.getKind() == STRING_REPLACE_RE);
+ NodeManager* nm = NodeManager::currentNM();
+ Node x = node[0];
+ Node y = node[1];
+ Node z = node[2];
+
+ if (x.isConst())
+ {
+ // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
+ std::pair<size_t, size_t> match = firstMatch(x, y);
+ if (match.first != string::npos)
+ {
+ String s = x.getConst<String>();
+ Node ret = nm->mkNode(STRING_CONCAT,
+ nm->mkConst(s.substr(0, match.first)),
+ z,
+ nm->mkConst(s.substr(match.second)));
+ return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
+ }
+ else
+ {
+ return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
+ }
+ }
+ return node;
+}
+
+Node SequencesRewriter::rewriteReplaceReAll(Node node)
+{
+ Assert(node.getKind() == STRING_REPLACE_RE_ALL);
+ NodeManager* nm = NodeManager::currentNM();
+ Node x = node[0];
+ Node y = node[1];
+ Node z = node[2];
+
+ if (x.isConst())
+ {
+ // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
+ // "Z" ++ y ++ "Z" ++ y
+ TypeNode t = x.getType();
+ Node emp = Word::mkEmptyWord(t);
+ Node yp = Rewriter::rewrite(
+ nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
+ std::vector<Node> res;
+ String rem = x.getConst<String>();
+ std::pair<size_t, size_t> match(0, 0);
+ while (rem.size() >= 0)
+ {
+ match = firstMatch(nm->mkConst(rem), yp);
+ if (match.first == string::npos)
+ {
+ break;
+ }
+ res.push_back(nm->mkConst(rem.substr(0, match.first)));
+ res.push_back(z);
+ rem = rem.substr(match.second);
+ }
+ res.push_back(nm->mkConst(rem));
+ Node ret = utils::mkConcat(res, t);
+ return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
+ }
+
+ return node;
+}
+
+std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
+{
+ Assert(n.isConst() && n.getType().isStringLike());
+ Assert(r.getType().isRegExp());
+ NodeManager* nm = NodeManager::currentNM();
+
+ std::vector<Node> emptyVec;
+ Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
+ Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
+ String s = n.getConst<String>();
+
+ if (s.size() == 0)
+ {
+ if (RegExpEntail::testConstStringInRegExp(s, 0, r))
+ {
+ return std::make_pair(0, 0);
+ }
+ else
+ {
+ return std::make_pair(string::npos, string::npos);
+ }
+ }
+
+ for (size_t i = 0, size = s.size(); i < size; i++)
+ {
+ if (RegExpEntail::testConstStringInRegExp(s, i, re))
+ {
+ for (size_t j = i; j <= size; j++)
+ {
+ String substr = s.substr(i, j - i);
+ if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
+ {
+ return std::make_pair(i, j);
+ }
+ }
+ }
+ }
+
+ return std::make_pair(string::npos, string::npos);
+}
+
Node SequencesRewriter::rewriteStrReverse(Node node)
{
Assert(node.getKind() == STRING_REV);
diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h
index 490dd8b3c..00ae21634 100644
--- a/src/theory/strings/sequences_rewriter.h
+++ b/src/theory/strings/sequences_rewriter.h
@@ -204,6 +204,33 @@ class SequencesRewriter : public TheoryRewriter
* str.replaceall. If it returns a non-null ret, then node rewrites to ret.
*/
Node rewriteReplaceInternal(Node node);
+ /** rewrite regular expression replace
+ *
+ * This method implements rewrite rules that apply to terms of the form
+ * str.replace_re(s, r, t).
+ *
+ * @param node The node to rewrite
+ * @return The rewritten node
+ */
+ Node rewriteReplaceRe(Node node);
+ /** rewrite regular expression replace
+ *
+ * This method implements rewrite rules that apply to terms of the form
+ * str.replace_re_all(s, r, t).
+ *
+ * @param node The node to rewrite
+ * @return The rewritten node
+ */
+ Node rewriteReplaceReAll(Node node);
+ /**
+ * Returns the first, shortest sequence in n that matches r.
+ *
+ * @param n The constant string or sequence to search in.
+ * @param r The regular expression to search for.
+ * @return A pair holding the start position and the end position of the
+ * match or a pair of string::npos if r does not appear in n.
+ */
+ std::pair<size_t, size_t> firstMatch(Node n, Node r);
/** rewrite string reverse
*
* This is the entry point for post-rewriting terms n of the form
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h
index 7ce507f29..bb4d0de55 100644
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -102,6 +102,16 @@ class SkolemCache
// of b in a as the witness for contains( a, b ).
SK_FIRST_CTN_PRE,
SK_FIRST_CTN_POST,
+ // For sequence a and regular expression b,
+ // in_re(a, re.++(_*, b, _*)) =>
+ // exists k_pre, k_match, k_post.
+ // a = k_pre ++ k_match ++ k_post ^
+ // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
+ // re.++(_*, b, _*)) ^
+ // in_re(k2, y)
+ SK_FIRST_MATCH_PRE,
+ SK_FIRST_MATCH,
+ SK_FIRST_MATCH_POST,
// For integer b,
// len( a ) > b =>
// exists k. a = k ++ a' ^ len( k ) = b
@@ -120,6 +130,14 @@ class SkolemCache
// k(x) is the end index of the x^th occurrence of b in a
// where n is the number of occurrences of b in a, and k(0)=0.
SK_OCCUR_INDEX,
+ // For function k: Int -> Int
+ // exists k.
+ // forall 0 <= x < n,
+ // k(x) is the length of the x^th occurrence of b in a (excluding
+ // matches of empty strings)
+ // where b is a regular expression, n is the number of occurrences of b
+ // in a, and k(0)=0.
+ SK_OCCUR_LEN,
};
/**
* Returns a skolem of type string that is cached for (a,b,id) and has
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
index 6330d7c10..395604f76 100644
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -76,9 +76,10 @@ void TermRegistry::preRegisterTerm(TNode n)
if (!options::stringExp())
{
if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
- || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN
- || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
- || k == STRING_REV)
+ || k == STRING_STRREPL || k == STRING_STRREPLALL
+ || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
+ || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
+ || k == STRING_TOUPPER || k == STRING_REV)
{
std::stringstream ss;
ss << "Term of kind " << k
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 5107fa3f9..d83d5ca49 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -118,6 +118,9 @@ TheoryStrings::TheoryStrings(context::Context* c,
d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF);
d_equalityEngine.addFunctionKind(kind::STRING_STRREPL);
d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL);
+ d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE);
+ d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE_ALL);
+ d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL);
d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER);
d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER);
d_equalityEngine.addFunctionKind(kind::STRING_REV);
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index 50c6ede62..550a7ba79 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -488,6 +488,175 @@ Node StringsPreprocess::reduce(Node t,
// Thus, replaceall( x, y, z ) = rpaw
retNode = rpaw;
}
+ else if (t.getKind() == STRING_REPLACE_RE)
+ {
+ Node x = t[0];
+ Node y = t[1];
+ Node z = t[2];
+ Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
+
+ std::vector<Node> emptyVec;
+ Node sigmaStar =
+ nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
+ Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar);
+ // in_re(x, re.++(_*, y, _*))
+ Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+
+ // in_re("", y)
+ Node matchesEmpty =
+ nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y);
+ // k = z ++ x
+ Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x));
+
+ Node k1 =
+ sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre");
+ Node k2 =
+ sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match");
+ Node k3 =
+ sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post");
+ // x = k1 ++ k2 ++ k3
+ Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
+ // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*))
+ Node k2len = nm->mkNode(STRING_LENGTH, k2);
+ Node firstMatch =
+ nm->mkNode(
+ STRING_IN_REGEXP,
+ nm->mkNode(
+ STRING_CONCAT,
+ k1,
+ nm->mkNode(
+ STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))),
+ re)
+ .negate();
+ // in_re(k2, y)
+ Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y);
+ // k = k1 ++ z ++ k3
+ Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
+
+ // IF in_re(x, re.++(_*, y, _*))
+ // THEN:
+ // IF in_re("", y)
+ // THEN: k = z ++ x
+ // ELSE:
+ // x = k1 ++ k2 ++ k3 ^
+ // ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^
+ // in_re(k2, y) ^ k = k1 ++ z ++ k3
+ // ELSE: k = x
+ asserts.push_back(nm->mkNode(
+ ITE,
+ hasMatch,
+ nm->mkNode(ITE,
+ matchesEmpty,
+ res1,
+ nm->mkNode(AND, splitX, firstMatch, k2Match, res2)),
+ k.eqNode(x)));
+ retNode = k;
+ }
+ else if (t.getKind() == STRING_REPLACE_RE_ALL)
+ {
+ Node x = t[0];
+ Node y = t[1];
+ Node z = t[2];
+ Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
+
+ Node numOcc = sc->mkTypedSkolemCached(
+ nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
+ std::vector<TypeNode> argTypes;
+ argTypes.push_back(nm->integerType());
+ Node us = nm->mkSkolem("Us", nm->mkFunctionType(argTypes, t.getType()));
+ TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
+ Node uf = sc->mkTypedSkolemCached(
+ ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
+ Node ul =
+ sc->mkTypedSkolemCached(ufType, x, y, SkolemCache::SK_OCCUR_LEN, "Ul");
+
+ Node emp = Word::mkEmptyWord(t.getType());
+
+ std::vector<Node> emptyVec;
+ Node sigmaStar =
+ nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
+ Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp));
+ Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar);
+ // in_re(x, _* ++ y' ++ _*)
+ Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+
+ Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
+ Node usno = nm->mkNode(APPLY_UF, us, numOcc);
+ Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x));
+
+ std::vector<Node> lemmas;
+ // numOcc > 0
+ lemmas.push_back(nm->mkNode(GT, numOcc, zero));
+ // k = Us(0)
+ lemmas.push_back(k.eqNode(nm->mkNode(APPLY_UF, us, zero)));
+ // Us(numOcc) = substr(x, Uf(numOcc))
+ lemmas.push_back(usno.eqNode(rem));
+ // Uf(0) = 0
+ lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
+ // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
+ lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate());
+
+ Node i = SkolemCache::mkIndexVar(t);
+ Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
+ Node bound =
+ nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
+ Node ip1 = nm->mkNode(PLUS, i, one);
+ Node ufi = nm->mkNode(APPLY_UF, uf, i);
+ Node uli = nm->mkNode(APPLY_UF, ul, i);
+ Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1);
+ Node ii = nm->mkNode(MINUS, ufip1, uli);
+ Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli);
+ Node pfxMatch =
+ nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
+ Node nonMatch =
+ nm->mkNode(STRING_SUBSTR,
+ x,
+ ufi,
+ nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi));
+
+ std::vector<Node> flem;
+ // Ul(i) > 0
+ flem.push_back(nm->mkNode(GT, uli, zero));
+ // Uf(i + 1) >= Uf(i) + Ul(i)
+ flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli)));
+ // in_re(substr(x, ii, Ul(i)), y')
+ flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp));
+ // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*))
+ flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate());
+ // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
+ flem.push_back(
+ nm->mkNode(APPLY_UF, us, i)
+ .eqNode(nm->mkNode(
+ STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1))));
+
+ Node forall = nm->mkNode(
+ FORALL, bvli, nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)));
+ lemmas.push_back(forall);
+
+ // IF in_re(x, re.++(_*, y', _*))
+ // THEN:
+ // numOcc > 0 ^
+ // k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^
+ // Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
+ // forall i. 0 <= i < nummOcc =>
+ // Ul(i) > 0 ^
+ // Uf(i + 1) >= Uf(i) + Ul(i) ^
+ // in_re(substr(x, ii, Ul(i)), y') ^
+ // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^
+ // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
+ // where ii = Uf(i + 1) - Ul(i)
+ // ELSE: k = x
+ // where y' = re.diff(y, "")
+ //
+ // Conceptually, y' is the regex y but excluding the empty string (because
+ // we do not want to match empty strings), numOcc is the number of shortest
+ // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i)
+ // is the length of the i^th match, and Us(i) is the result of processing
+ // the remainder after processing the i^th occurrence of y in x.
+ asserts.push_back(
+ nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x)));
+ retNode = k;
+ }
else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
{
Node x = t[0];
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index 29224803a..ea1021d89 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -2082,8 +2082,10 @@ set(regress_2_tests
regress2/strings/non_termination_regular_expression6.smt2
regress2/strings/norn-dis-0707-3.smt2
regress2/strings/range-perf.smt2
- regress2/strings/repl-repl.smt2
regress2/strings/repl-repl-i-no-push.smt2
+ regress2/strings/repl-repl.smt2
+ regress2/strings/replace_re.smt2
+ regress2/strings/replace_re_all.smt2
regress2/strings/replaceall-diffrange.smt2
regress2/strings/replaceall-len-c.smt2
regress2/strings/small-1.smt2
diff --git a/test/regress/regress2/strings/replace_re.smt2 b/test/regress/regress2/strings/replace_re.smt2
new file mode 100644
index 000000000..1f9b2a2ee
--- /dev/null
+++ b/test/regress/regress2/strings/replace_re.smt2
@@ -0,0 +1,42 @@
+; COMMAND-LINE: --strings-exp
+(set-option :incremental true)
+(set-logic SLIA)
+(declare-const x String)
+
+(push)
+(assert (= "AFOOE" (str.replace_re x (re.++ (str.to_re "B") re.allchar (str.to_re "C")) "FOO")))
+(assert (not (= x "AFOOE")))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "FOOA" (str.replace_re x re.all "FOO")))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "FOOA" (str.replace_re x (re.++ (str.to_re "A") re.all) "FOO")))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "FOOA" (str.replace_re x (re.++ (str.to_re "A") re.all) "FOO")))
+(assert (> (str.len x) 2))
+(set-info :status unsat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "FOOA" (str.replace_re "A" re.all "FOO")))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (not (= "A" (str.replace_re "A" re.none "FOO"))))
+(set-info :status unsat)
+(check-sat)
+(pop)
diff --git a/test/regress/regress2/strings/replace_re_all.smt2 b/test/regress/regress2/strings/replace_re_all.smt2
new file mode 100644
index 000000000..cf2b674c3
--- /dev/null
+++ b/test/regress/regress2/strings/replace_re_all.smt2
@@ -0,0 +1,31 @@
+; COMMAND-LINE: --strings-exp
+(set-option :incremental true)
+(set-logic SLIA)
+(declare-const x String)
+(declare-const y String)
+
+(push)
+(assert (= x (str.replace_re_all "ZABCZACZADDC" (re.++ (str.to_re "A") re.all (str.to_re "C")) y)))
+(assert (= x "ZFOOZFXOZFOO"))
+(set-info :status unsat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "ZFOOZFXOZFOO" (str.replace_re_all x (re.++ (str.to_re "A") re.all (str.to_re "C")) "FOO")))
+(assert (not (= x "ZFOOZFXOZFOO")))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "ZFOOZZFOO" (str.replace_re_all (str.++ "ZACZ" x "ZADDC") (re.++ (str.to_re "A") re.all (str.to_re "C")) "FOO")))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= "ZFOOZZFOO" (str.replace_re_all (str.++ "ZACXZ" x "ZADDC") (re.++ (str.to_re "A") re.all (str.to_re "C")) "FOO")))
+(set-info :status unsat)
+(check-sat)
+(pop)
diff --git a/test/unit/theory/sequences_rewriter_white.h b/test/unit/theory/sequences_rewriter_white.h
index 7e45296a9..ba3070bff 100644
--- a/test/unit/theory/sequences_rewriter_white.h
+++ b/test/unit/theory/sequences_rewriter_white.h
@@ -707,6 +707,228 @@ class SequencesRewriterWhite : public CxxTest::TestSuite
}
}
+ void testRewriteReplaceRe()
+ {
+ TypeNode intType = d_nm->integerType();
+ TypeNode strType = d_nm->stringType();
+
+ std::vector<Node> emptyVec;
+ Node sigStar = d_nm->mkNode(kind::REGEXP_STAR,
+ d_nm->mkNode(kind::REGEXP_SIGMA, emptyVec));
+ Node foo = d_nm->mkConst(String("FOO"));
+ Node a = d_nm->mkConst(String("A"));
+ Node b = d_nm->mkConst(String("B"));
+ Node re = d_nm->mkNode(kind::REGEXP_CONCAT,
+ d_nm->mkNode(kind::STRING_TO_REGEXP, a),
+ sigStar,
+ d_nm->mkNode(kind::STRING_TO_REGEXP, b));
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "AZZZB"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "FOO"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE, d_nm->mkConst(String("AZZZB")), re, foo);
+ Node res = d_nm->mkConst(::CVC4::String("FOO"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZAZZZBZZB"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "ZFOOZZB"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE, d_nm->mkConst(String("ZAZZZBZZB")), re, foo);
+ Node res = d_nm->mkConst(::CVC4::String("ZFOOZZB"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZAZZZBZAZB"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "ZFOOZAZB"
+ {
+ Node t = d_nm->mkNode(kind::STRING_REPLACE_RE,
+ d_nm->mkConst(String("ZAZZZBZAZB")),
+ re,
+ foo);
+ Node res = d_nm->mkConst(::CVC4::String("ZFOOZAZB"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZZZ"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "ZZZ"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE, d_nm->mkConst(String("ZZZ")), re, foo);
+ Node res = d_nm->mkConst(::CVC4::String("ZZZ"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZZZ"
+ // re.all
+ // "FOO")
+ //
+ // "FOOZZZ"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE, d_nm->mkConst(String("ZZZ")), sigStar, foo);
+ Node res = d_nm->mkConst(::CVC4::String("FOOZZZ"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // ""
+ // re.all
+ // "FOO")
+ //
+ // "FOO"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE, d_nm->mkConst(String("")), sigStar, foo);
+ Node res = d_nm->mkConst(::CVC4::String("FOO"));
+ sameNormalForm(t, res);
+ }
+ }
+
+ void testRewriteReplaceReAll()
+ {
+ TypeNode intType = d_nm->integerType();
+ TypeNode strType = d_nm->stringType();
+
+ std::vector<Node> emptyVec;
+ Node sigStar = d_nm->mkNode(kind::REGEXP_STAR,
+ d_nm->mkNode(kind::REGEXP_SIGMA, emptyVec));
+ Node foo = d_nm->mkConst(String("FOO"));
+ Node a = d_nm->mkConst(String("A"));
+ Node b = d_nm->mkConst(String("B"));
+ Node re = d_nm->mkNode(kind::REGEXP_CONCAT,
+ d_nm->mkNode(kind::STRING_TO_REGEXP, a),
+ sigStar,
+ d_nm->mkNode(kind::STRING_TO_REGEXP, b));
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "AZZZB"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "FOO"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE_ALL, d_nm->mkConst(String("AZZZB")), re, foo);
+ Node res = d_nm->mkConst(::CVC4::String("FOO"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZAZZZBZZB"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "ZFOOZZB"
+ {
+ Node t = d_nm->mkNode(kind::STRING_REPLACE_RE_ALL,
+ d_nm->mkConst(String("ZAZZZBZZB")),
+ re,
+ foo);
+ Node res = d_nm->mkConst(::CVC4::String("ZFOOZZB"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZAZZZBZAZB"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "ZFOOZFOO"
+ {
+ Node t = d_nm->mkNode(kind::STRING_REPLACE_RE_ALL,
+ d_nm->mkConst(String("ZAZZZBZAZB")),
+ re,
+ foo);
+ Node res = d_nm->mkConst(::CVC4::String("ZFOOZFOO"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZZZ"
+ // (re.++ (str.to_re "A") re.all (str.to_re "B"))
+ // "FOO")
+ //
+ // "ZZZ"
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE_ALL, d_nm->mkConst(String("ZZZ")), re, foo);
+ Node res = d_nm->mkConst(::CVC4::String("ZZZ"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // "ZZZ"
+ // re.all
+ // "FOO")
+ //
+ // "FOOFOOFOO"
+ {
+ Node t = d_nm->mkNode(kind::STRING_REPLACE_RE_ALL,
+ d_nm->mkConst(String("ZZZ")),
+ sigStar,
+ foo);
+ Node res = d_nm->mkConst(::CVC4::String("FOOFOOFOO"));
+ sameNormalForm(t, res);
+ }
+
+ // Same normal form:
+ //
+ // (str.replace_re
+ // ""
+ // re.all
+ // "FOO")
+ //
+ // ""
+ {
+ Node t = d_nm->mkNode(
+ kind::STRING_REPLACE_RE_ALL, d_nm->mkConst(String("")), sigStar, foo);
+ Node res = d_nm->mkConst(::CVC4::String(""));
+ sameNormalForm(t, res);
+ }
+ }
+
void testRewriteContains()
{
TypeNode intType = d_nm->integerType();
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback