summaryrefslogtreecommitdiff
path: root/src/theory
diff options
context:
space:
mode:
authorAndres Noetzli <andres.noetzli@gmail.com>2021-05-27 15:42:10 -0700
committerGitHub <noreply@github.com>2021-05-27 22:42:10 +0000
commit29f0b8f378377ed836bddaaf88883d0b2eeb545d (patch)
tree5d9edb57c60ab8c8a07dab52f18b72dd441d4fdf /src/theory
parent631032b15327c28c44b51490dceb434a38f3419a (diff)
Fix `str.replace_re` and `str.replace_re_all` (#6615)
Fixes #6057. The reductions of `str.replace_re` and `str.replace_re_all` were not correctly enforcing that the operations replace the _first_ occurrence of some regular expression in a string. This commit fixes the issue by introducing a new operator `str.indexof_re(s, r, n)`, which, analoguously to `str.indexof`, returns the index of the first match of the regular expression `r` in `s`. The commit adds basic rewrites for evaluating the operator as well as its reduction. Additionally, it converts the reductions of `str.replace_re` and `str.replace_re_all` to use that new operator. This simplifies the reductions of the two operators and ensures that the semantics are consistent between the two.
Diffstat (limited to 'src/theory')
-rw-r--r--src/theory/strings/extf_solver.cpp11
-rw-r--r--src/theory/strings/kinds8
-rw-r--r--src/theory/strings/rewrites.cpp4
-rw-r--r--src/theory/strings/rewrites.h4
-rw-r--r--src/theory/strings/sequences_rewriter.cpp59
-rw-r--r--src/theory/strings/sequences_rewriter.h6
-rw-r--r--src/theory/strings/skolem_cache.cpp18
-rw-r--r--src/theory/strings/skolem_cache.h21
-rw-r--r--src/theory/strings/term_registry.cpp18
-rw-r--r--src/theory/strings/theory_strings.cpp1
-rw-r--r--src/theory/strings/theory_strings_preprocess.cpp234
-rw-r--r--src/theory/strings/theory_strings_utils.cpp5
12 files changed, 296 insertions, 93 deletions
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp
index a1de5e295..9576c1d81 100644
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -53,6 +53,7 @@ ExtfSolver::ExtfSolver(SolverState& s,
d_extt.addFunctionKind(kind::STRING_SUBSTR);
d_extt.addFunctionKind(kind::STRING_UPDATE);
d_extt.addFunctionKind(kind::STRING_STRIDOF);
+ d_extt.addFunctionKind(kind::STRING_INDEXOF_RE);
d_extt.addFunctionKind(kind::STRING_ITOS);
d_extt.addFunctionKind(kind::STRING_STOI);
d_extt.addFunctionKind(kind::STRING_STRREPL);
@@ -191,11 +192,11 @@ bool ExtfSolver::doReduction(int effort, Node n)
{
NodeManager* nm = NodeManager::currentNM();
Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN
- || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
- || k == STRING_STRREPL || k == STRING_STRREPLALL || k == SEQ_NTH
- || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
- || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
- || k == STRING_REV);
+ || k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
+ || k == STRING_STOI || k == STRING_STRREPL || k == STRING_STRREPLALL
+ || k == SEQ_NTH || k == STRING_REPLACE_RE
+ || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ
+ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV);
std::vector<Node> new_nodes;
Node res = d_preproc.simplify(n, new_nodes);
Assert(res != n);
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds
index 1353ca964..743a5c006 100644
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -21,13 +21,12 @@ operator STRING_CHARAT 2 "string charat"
operator STRING_STRCTN 2 "string contains"
operator STRING_LT 2 "string less than"
operator STRING_LEQ 2 "string less than or equal"
-operator STRING_STRIDOF 3 "string indexof"
+operator STRING_STRIDOF 3 "string index of substring"
+operator STRING_INDEXOF_RE 3 "string index of regular expression match"
operator STRING_STRREPL 3 "string replace"
operator STRING_STRREPLALL 3 "string replace all"
operator STRING_REPLACE_RE 3 "string replace regular expression match"
operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches"
-typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
-typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
operator STRING_PREFIX 2 "string prefixof"
operator STRING_SUFFIX 2 "string suffixof"
operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit"
@@ -152,8 +151,11 @@ typerule STRING_UPDATE ::cvc5::theory::strings::StringUpdateTypeRule
typerule STRING_CHARAT ::cvc5::theory::strings::StringAtTypeRule
typerule STRING_STRCTN ::cvc5::theory::strings::StringRelationTypeRule
typerule STRING_STRIDOF ::cvc5::theory::strings::StringIndexOfTypeRule
+typerule STRING_INDEXOF_RE "SimpleTypeRule<RInteger, AString, ARegExp, AInteger>"
typerule STRING_STRREPL ::cvc5::theory::strings::StringReplaceTypeRule
typerule STRING_STRREPLALL ::cvc5::theory::strings::StringReplaceTypeRule
+typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
+typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
typerule STRING_PREFIX ::cvc5::theory::strings::StringStrToBoolTypeRule
typerule STRING_SUFFIX ::cvc5::theory::strings::StringStrToBoolTypeRule
typerule STRING_REV ::cvc5::theory::strings::StringStrToStrTypeRule
diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp
index 0c4e35df7..549d33efc 100644
--- a/src/theory/strings/rewrites.cpp
+++ b/src/theory/strings/rewrites.cpp
@@ -68,6 +68,10 @@ const char* toString(Rewrite r)
case Rewrite::IDOF_PULL_ENDPT: return "IDOF_PULL_ENDPT";
case Rewrite::IDOF_STRIP_CNST_ENDPTS: return "IDOF_STRIP_CNST_ENDPTS";
case Rewrite::IDOF_STRIP_SYM_LEN: return "IDOF_STRIP_SYM_LEN";
+ case Rewrite::INDEXOF_RE_EMP_RE: return "INDEXOF_RE_EMP_RE";
+ case Rewrite::INDEXOF_RE_EVAL: return "INDEXOF_RE_EVAL";
+ case Rewrite::INDEXOF_RE_INVALID_INDEX: return "INDEXOF_RE_INVALID_INDEX";
+ case Rewrite::INDEXOF_RE_MAX_INDEX: return "INDEXOF_RE_MAX_INDEX";
case Rewrite::ITOS_EVAL: return "ITOS_EVAL";
case Rewrite::RE_AND_EMPTY: return "RE_AND_EMPTY";
case Rewrite::RE_ANDOR_FLATTEN: return "RE_ANDOR_FLATTEN";
diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h
index 482666faa..5e63c55c8 100644
--- a/src/theory/strings/rewrites.h
+++ b/src/theory/strings/rewrites.h
@@ -73,6 +73,10 @@ enum class Rewrite : uint32_t
IDOF_PULL_ENDPT,
IDOF_STRIP_CNST_ENDPTS,
IDOF_STRIP_SYM_LEN,
+ INDEXOF_RE_EMP_RE,
+ INDEXOF_RE_EVAL,
+ INDEXOF_RE_INVALID_INDEX,
+ INDEXOF_RE_MAX_INDEX,
ITOS_EVAL,
RE_AND_EMPTY,
RE_ANDOR_FLATTEN,
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp
index 15c37070a..7ef1242c6 100644
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -1461,6 +1461,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node)
{
retNode = rewriteIndexof(node);
}
+ else if (nk == kind::STRING_INDEXOF_RE)
+ {
+ retNode = rewriteIndexofRe(node);
+ }
else if (nk == kind::STRING_STRREPL)
{
retNode = rewriteReplace(node);
@@ -2529,6 +2533,59 @@ Node SequencesRewriter::rewriteIndexof(Node node)
return node;
}
+Node SequencesRewriter::rewriteIndexofRe(Node node)
+{
+ Assert(node.getKind() == STRING_INDEXOF_RE);
+ NodeManager* nm = NodeManager::currentNM();
+ Node s = node[0];
+ Node r = node[1];
+ Node n = node[2];
+ Node zero = nm->mkConst(Rational(0));
+ Node slen = nm->mkNode(STRING_LENGTH, s);
+
+ if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true))
+ {
+ Node ret = nm->mkConst(Rational(-1));
+ return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
+ }
+
+ if (RegExpEntail::isConstRegExp(r))
+ {
+ if (s.isConst() && n.isConst())
+ {
+ Rational nrat = n.getConst<Rational>();
+ cvc5::Rational rMaxInt(cvc5::String::maxSize());
+ if (nrat > rMaxInt)
+ {
+ // We know that, due to limitations on the size of string constants
+ // in our implementation, that accessing a position greater than
+ // rMaxInt is guaranteed to be out of bounds.
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
+ }
+
+ uint32_t start = nrat.getNumerator().toUnsignedInt();
+ Node rem = nm->mkConst(s.getConst<String>().substr(start));
+ std::pair<size_t, size_t> match = firstMatch(rem, r);
+ Node ret = nm->mkConst(
+ Rational(match.first == string::npos
+ ? -1
+ : static_cast<int64_t>(start + match.first)));
+ return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
+ }
+
+ if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n))
+ {
+ String emptyStr("");
+ if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
+ {
+ return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
+ }
+ }
+ }
+ return node;
+}
+
Node SequencesRewriter::rewriteReplace(Node node)
{
Assert(node.getKind() == kind::STRING_STRREPL);
@@ -3150,7 +3207,7 @@ Node SequencesRewriter::rewriteReplaceReAll(Node node)
std::vector<Node> res;
String rem = x.getConst<String>();
std::pair<size_t, size_t> match(0, 0);
- while (rem.size() >= 0)
+ while (rem.size() != 0)
{
match = firstMatch(nm->mkConst(rem), yp);
if (match.first == string::npos)
diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h
index 7af24596a..37ed78786 100644
--- a/src/theory/strings/sequences_rewriter.h
+++ b/src/theory/strings/sequences_rewriter.h
@@ -194,6 +194,12 @@ class SequencesRewriter : public TheoryRewriter
* Returns the rewritten form of node.
*/
Node rewriteIndexof(Node node);
+ /** rewrite indexof regular expression match
+ * This is the entry point for post-rewriting terms n of the form
+ * str.indexof_re( s, r, n )
+ * Returns the rewritten form of node.
+ */
+ Node rewriteIndexofRe(Node node);
/** rewrite replace
* This is the entry point for post-rewriting terms n of the form
* str.replace( s, t, r )
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp
index 9b23301f3..913518bc8 100644
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -40,6 +40,16 @@ struct IndexVarAttributeId
};
typedef expr::Attribute<IndexVarAttributeId, Node> IndexVarAttribute;
+/**
+ * A bound variable corresponding to the universally quantified integer
+ * variable used to range over the valid lengths of a string, used for
+ * axiomatizing the behavior of some term.
+ */
+struct LengthVarAttributeId
+{
+};
+typedef expr::Attribute<LengthVarAttributeId, Node> LengthVarAttribute;
+
SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts)
{
NodeManager* nm = NodeManager::currentNM();
@@ -300,6 +310,14 @@ Node SkolemCache::mkIndexVar(Node t)
return bvm->mkBoundVar<IndexVarAttribute>(t, intType);
}
+Node SkolemCache::mkLengthVar(Node t)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ TypeNode intType = nm->integerType();
+ BoundVarManager* bvm = nm->getBoundVarManager();
+ return bvm->mkBoundVar<LengthVarAttribute>(t, intType);
+}
+
} // namespace strings
} // namespace theory
} // namespace cvc5
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h
index 126ee313d..f0376dbc6 100644
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -107,9 +107,14 @@ class SkolemCache
// in_re(a, re.++(_*, b, _*)) =>
// exists k_pre, k_match, k_post.
// a = k_pre ++ k_match ++ k_post ^
- // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
- // re.++(_*, b, _*)) ^
- // in_re(k2, y)
+ // len(k_pre) = indexof_re(x, y, 0) ^
+ // (forall l. 0 < l < len(k_match) =>
+ // ~in_re(substr(k_match, 0, l), r)) ^
+ // in_re(k_match, b)
+ //
+ // k_pre is the prefix before the first, shortest match of b in a. k_match
+ // is the substring of a matched by b. It is either empty or there is no
+ // shorter string that matches b.
SK_FIRST_MATCH_PRE,
SK_FIRST_MATCH,
SK_FIRST_MATCH_POST,
@@ -180,6 +185,16 @@ class SkolemCache
*/
static Node mkIndexVar(Node t);
+ /** Make length variable
+ *
+ * This returns an integer variable of kind BOUND_VARIABLE that is used for
+ * axiomatizing the behavior of a term or predicate t. It refers to lengths
+ * of strings in the reduction of t. For example, the length variable for the
+ * term str.indexof(s, r, n) is used to quantify over the lengths of strings
+ * that could be matched by r.
+ */
+ static Node mkLengthVar(Node t);
+
private:
/**
* Simplifies the arguments for a string skolem used for indexing into the
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
index af2eecde8..c7b3b5300 100644
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -88,10 +88,11 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
}
- else if (tk == STRING_STRIDOF)
+ else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE)
{
- // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
- // x)))
+ // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x)))
+ //
+ // where f in { str.indexof, str.indexof_re }
Node l = nm->mkNode(STRING_LENGTH, t[0]);
lemma = nm->mkNode(AND,
nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
@@ -143,11 +144,12 @@ void TermRegistry::preRegisterTerm(TNode n)
Kind k = n.getKind();
if (!options::stringExp())
{
- if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
- || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
- || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
- || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
- || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
+ if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
+ || k == STRING_STOI || k == STRING_STRREPL || k == STRING_SUBSTR
+ || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE
+ || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ
+ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV
+ || k == STRING_UPDATE)
{
std::stringstream ss;
ss << "Term of kind " << k
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 06a39ec64..b1f5a0765 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -130,6 +130,7 @@ void TheoryStrings::finishInit()
d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval);
+ d_equalityEngine->addFunctionKind(kind::STRING_INDEXOF_RE, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval);
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index 356ae28ac..83a4fa04a 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -253,6 +253,89 @@ Node StringsPreprocess::reduce(Node t,
// Thus, indexof( x, y, n ) = skk.
retNode = skk;
}
+ else if (t.getKind() == kind::STRING_INDEXOF_RE)
+ {
+ // processing term: indexof_re(s, r, n)
+ Node s = t[0];
+ Node r = t[1];
+ Node n = t[2];
+ Node skk = sc->mkTypedSkolemCached(
+ nm->integerType(), t, SkolemCache::SK_PURIFY, "iork");
+ Node sLen = nm->mkNode(STRING_LENGTH, s);
+
+ // n > len(x)
+ Node ub = nm->mkNode(GT, n, sLen);
+ // 0 > n
+ Node lb = nm->mkNode(GT, zero, n);
+ // n > len(x) OR 0 > n
+ Node condNegOne = nm->mkNode(OR, ub, lb);
+ // skk = -1
+ Node retNegOne = skk.eqNode(negOne);
+
+ Node emp = Word::mkEmptyWord(s.getType());
+ // in_re("", r)
+ Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r);
+ // skk = n
+ Node retN = skk.eqNode(n);
+
+ Node i = SkolemCache::mkIndexVar(t);
+ Node l = SkolemCache::mkLengthVar(t);
+ Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l);
+ Node bound =
+ nm->mkNode(AND,
+ {
+ nm->mkNode(GEQ, i, n),
+ nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)),
+ nm->mkNode(GT, l, zero),
+ nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)),
+ });
+ Node body = nm->mkNode(
+ OR,
+ bound.negate(),
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r)
+ .negate());
+ // forall il.
+ // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
+ // ~in_re(substr(s, i, l), r)
+ Node firstMatch = mkForallInternal(bvl, body);
+ Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+ Node validLen =
+ nm->mkNode(AND,
+ nm->mkNode(GEQ, l, n),
+ nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk)));
+ Node matchBody = nm->mkNode(
+ AND,
+ validLen,
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r));
+ // skk != -1 =>
+ // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r))
+ Node match = nm->mkNode(
+ OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate());
+
+ // assert:
+ // IF: n > len(s) OR 0 > n
+ // THEN: skk = -1
+ // ELIF: in_re("", r)
+ // THEN: skk = n
+ // ELSE: (forall il.
+ // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
+ // ~in_re(substr(s, i, l), r)) ^
+ // (skk != -1 =>
+ // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r))
+ //
+ // Note that this reduction relies on eager reduction lemmas being sent to
+ // properly limit the range of skk.
+ Node rr = nm->mkNode(
+ ITE,
+ condNegOne,
+ retNegOne,
+ nm->mkNode(
+ ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match)));
+ asserts.push_back(rr);
+
+ // Thus, indexof_re(s, r, n) = skk.
+ retNode = skk;
+ }
else if (t.getKind() == STRING_ITOS)
{
// processing term: int.to.str( n )
@@ -596,17 +679,14 @@ Node StringsPreprocess::reduce(Node t,
}
else if (t.getKind() == STRING_REPLACE_RE)
{
+ // processing term: replace_re( x, y, z )
Node x = t[0];
Node y = t[1];
Node z = t[2];
Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
- std::vector<Node> emptyVec;
- Node sigmaStar =
- nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
- Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar);
- // in_re(x, re.++(_*, y, _*))
- Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+ // indexof_re(x, y, 0)
+ Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero);
// in_re("", y)
Node matchesEmpty =
@@ -620,42 +700,40 @@ Node StringsPreprocess::reduce(Node t,
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match");
Node k3 =
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post");
+ Node k2Len = nm->mkNode(STRING_LENGTH, k2);
// x = k1 ++ k2 ++ k3
- Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
- // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*))
- Node k2len = nm->mkNode(STRING_LENGTH, k2);
- Node firstMatch =
- nm->mkNode(
- STRING_IN_REGEXP,
- nm->mkNode(
- STRING_CONCAT,
- k1,
- nm->mkNode(
- STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))),
- re)
- .negate();
+ Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
+ // len(k1) = indexof_re(x, y, 0)
+ Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx);
+ Node l = SkolemCache::mkLengthVar(t);
+ Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+ Node bound =
+ nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len));
+ Node body = nm->mkNode(
+ OR,
+ bound.negate(),
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y)
+ .negate());
+ // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)
+ Node shortestMatch = mkForallInternal(bvll, body);
// in_re(k2, y)
- Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y);
+ Node match = nm->mkNode(STRING_IN_REGEXP, k2, y);
// k = k1 ++ z ++ k3
- Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
+ Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
- // IF in_re(x, re.++(_*, y, _*))
- // THEN:
- // IF in_re("", y)
- // THEN: k = z ++ x
- // ELSE:
- // x = k1 ++ k2 ++ k3 ^
- // ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^
- // in_re(k2, y) ^ k = k1 ++ z ++ k3
- // ELSE: k = x
- asserts.push_back(nm->mkNode(
- ITE,
- hasMatch,
+ // IF: indexof_re(x, y, 0) = -1
+ // THEN: k = x
+ // ELSE:
+ // x = k1 ++ k2 ++ k3 ^
+ // len(k1) = indexof_re(x, y, 0) ^
+ // (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^
+ // in_re(k2, y) ^
+ // k = k1 ++ z ++ k3
+ asserts.push_back(
nm->mkNode(ITE,
- matchesEmpty,
- res1,
- nm->mkNode(AND, {splitX, firstMatch, k2Match, res2})),
- k.eqNode(x)));
+ idx.eqNode(negOne),
+ k.eqNode(x),
+ nm->mkNode(AND, {split, k1Len, shortestMatch, match, res})));
retNode = k;
}
else if (t.getKind() == STRING_REPLACE_RE_ALL)
@@ -679,13 +757,10 @@ Node StringsPreprocess::reduce(Node t,
Node emp = Word::mkEmptyWord(t.getType());
- std::vector<Node> emptyVec;
- Node sigmaStar =
- nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp));
- Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar);
- // in_re(x, _* ++ y' ++ _*)
- Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+ Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero);
+ // indexof_re(x, y', 0) = -1
+ Node noMatch = idx.eqNode(negOne);
Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
Node usno = nm->mkNode(APPLY_UF, us, numOcc);
@@ -700,8 +775,9 @@ Node StringsPreprocess::reduce(Node t,
lemmas.push_back(usno.eqNode(rem));
// Uf(0) = 0
lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
- // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
- lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate());
+ // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1
+ lemmas.push_back(
+ nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne));
Node i = SkolemCache::mkIndexVar(t);
Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
@@ -709,50 +785,57 @@ Node StringsPreprocess::reduce(Node t,
nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
Node ip1 = nm->mkNode(PLUS, i, one);
Node ufi = nm->mkNode(APPLY_UF, uf, i);
- Node uli = nm->mkNode(APPLY_UF, ul, i);
Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1);
- Node ii = nm->mkNode(MINUS, ufip1, uli);
- Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli);
- Node pfxMatch =
- nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
- Node nonMatch =
- nm->mkNode(STRING_SUBSTR,
- x,
- ufi,
- nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi));
+ Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1);
+ // ii = Uf(i + 1) - Ul(i + 1)
+ Node ii = nm->mkNode(MINUS, ufip1, ulip1);
std::vector<Node> flem;
// Ul(i) > 0
- flem.push_back(nm->mkNode(GT, uli, zero));
- // Uf(i + 1) >= Uf(i) + Ul(i)
- flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli)));
- // in_re(substr(x, ii, Ul(i)), y')
- flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp));
- // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*))
- flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate());
+ flem.push_back(nm->mkNode(GT, ulip1, zero));
+ // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1)
+ flem.push_back(ufip1.eqNode(
+ nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1)));
+ // in_re(substr(x, ii, Ul(i + 1)), y')
+ flem.push_back(nm->mkNode(
+ STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp));
+ Node l = SkolemCache::mkLengthVar(t);
+ Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+ Node lenBound =
+ nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1));
+ Node shortestMatchBody = nm->mkNode(
+ OR,
+ lenBound.negate(),
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y)
+ .negate());
+ // forall l. 0 < l < Ul(i + 1) =>
+ // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
+ flem.push_back(mkForallInternal(bvll, shortestMatchBody));
+ Node pfxMatch =
+ nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
flem.push_back(
nm->mkNode(APPLY_UF, us, i)
.eqNode(nm->mkNode(
STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1))));
-
Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
Node forall = mkForallInternal(bvli, body);
lemmas.push_back(forall);
- // IF in_re(x, re.++(_*, y', _*))
- // THEN:
+ // IF indexof(x, y', 0) = -1
+ // THEN: k = x
+ // ELSE:
// numOcc > 0 ^
// k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^
- // Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
+ // Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^
// forall i. 0 <= i < nummOcc =>
// Ul(i) > 0 ^
- // Uf(i + 1) >= Uf(i) + Ul(i) ^
- // in_re(substr(x, ii, Ul(i)), y') ^
- // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^
+ // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^
+ // in_re(substr(x, ii, Ul(i + 1)), y') ^
+ // forall l. 0 < l < Ul(i + 1) =>
+ // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
// where ii = Uf(i + 1) - Ul(i)
- // ELSE: k = x
// where y' = re.diff(y, "")
//
// Conceptually, y' is the regex y but excluding the empty string (because
@@ -760,8 +843,17 @@ Node StringsPreprocess::reduce(Node t,
// matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i)
// is the length of the i^th match, and Us(i) is the result of processing
// the remainder after processing the i^th occurrence of y in x.
+ //
+ // Visualization of Uf(i) and Ul(i):
+ //
+ // x = |------------| match 1 |-----------| match 2 |---|
+ // | | |
+ // Uf(0) Uf(1) Uf(2)
+ //
+ // |---------| |-----------|
+ // Ul(1) Ul(2)
asserts.push_back(
- nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x)));
+ nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas)));
retNode = k;
}
else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp
index 6897d5272..c763557ca 100644
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -383,8 +383,9 @@ TypeNode getOwnerStringType(Node n)
{
TypeNode tn;
Kind k = n.getKind();
- if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN
- || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX)
+ if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
+ || k == STRING_STRCTN || k == SEQ_NTH || k == STRING_PREFIX
+ || k == STRING_SUFFIX)
{
// owning string type is the type of first argument
tn = n[0].getType();
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback