From c7ec792a2086c5b92c4a96d18f7cedb293712dfd Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Mon, 13 Jul 2020 00:43:45 -0500 Subject: Add support for string/sequence update (#4725) This adds basic support for string/sequence updating, which has a semantics that is length preserving. --- src/theory/strings/extf_solver.cpp | 12 +++-- src/theory/strings/kinds | 2 + src/theory/strings/rewrites.cpp | 5 ++ src/theory/strings/rewrites.h | 5 ++ src/theory/strings/sequences_rewriter.cpp | 52 ++++++++++++++++++++- src/theory/strings/sequences_rewriter.h | 6 +++ src/theory/strings/term_registry.cpp | 2 +- src/theory/strings/theory_strings.cpp | 1 + src/theory/strings/theory_strings_preprocess.cpp | 59 ++++++++++++++++++++++++ src/theory/strings/theory_strings_type_rules.h | 32 +++++++++++++ src/theory/strings/word.cpp | 22 +++++++++ src/theory/strings/word.h | 3 ++ 12 files changed, 194 insertions(+), 7 deletions(-) (limited to 'src/theory/strings') diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 4ee2f4919..9b1b0e6dd 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -51,6 +51,7 @@ ExtfSolver::ExtfSolver(context::Context* c, d_reduced(u) { d_extt.addFunctionKind(kind::STRING_SUBSTR); + d_extt.addFunctionKind(kind::STRING_UPDATE); d_extt.addFunctionKind(kind::STRING_STRIDOF); d_extt.addFunctionKind(kind::STRING_ITOS); d_extt.addFunctionKind(kind::STRING_STOI); @@ -183,11 +184,12 @@ bool ExtfSolver::doReduction(int effort, Node n) else if (k != kind::STRING_TO_CODE) { NodeManager* nm = NodeManager::currentNM(); - Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF - || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL - || k == STRING_STRREPLALL || k == STRING_REPLACE_RE - || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ - || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); + Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN + || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI + || k == STRING_STRREPL || k == STRING_STRREPLALL + || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL + || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER + || k == STRING_REV); std::vector new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 21a435152..226dcbd17 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -16,6 +16,7 @@ operator STRING_CONCAT 2: "string concat (N-ary)" operator STRING_IN_REGEXP 2 "membership" operator STRING_LENGTH 1 "string length" operator STRING_SUBSTR 3 "string substr" +operator STRING_UPDATE 3 "string update" operator STRING_CHARAT 2 "string charat" operator STRING_STRCTN 2 "string contains" operator STRING_LT 2 "string less than" @@ -141,6 +142,7 @@ typerule REGEXP_SIGMA "SimpleTypeRule" typerule STRING_CONCAT ::CVC4::theory::strings::StringConcatTypeRule typerule STRING_LENGTH ::CVC4::theory::strings::StringStrToIntTypeRule typerule STRING_SUBSTR ::CVC4::theory::strings::StringSubstrTypeRule +typerule STRING_UPDATE ::CVC4::theory::strings::StringUpdateTypeRule typerule STRING_CHARAT ::CVC4::theory::strings::StringAtTypeRule typerule STRING_STRCTN ::CVC4::theory::strings::StringRelationTypeRule typerule STRING_STRIDOF ::CVC4::theory::strings::StringIndexOfTypeRule diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 68b510de6..6ea467ae9 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -139,6 +139,11 @@ const char* toString(Rewrite r) case Rewrite::SS_START_NEG: return "SS_START_NEG"; case Rewrite::SS_STRIP_END_PT: return "SS_STRIP_END_PT"; case Rewrite::SS_STRIP_START_PT: return "SS_STRIP_START_PT"; + case Rewrite::UPD_EVAL: return "UPD_EVAL"; + case Rewrite::UPD_EMPTYSTR: return "UPD_EMPTYSTR"; + case Rewrite::UPD_CONST_INDEX_MAX_OOB: return "UPD_CONST_INDEX_MAX_OOB"; + case Rewrite::UPD_CONST_INDEX_NEG: return "UPD_CONST_INDEX_NEG"; + case Rewrite::UPD_CONST_INDEX_OOB: return "UPD_CONST_INDEX_OOB"; case Rewrite::STOI_CONCAT_NONNUM: return "STOI_CONCAT_NONNUM"; case Rewrite::STOI_EVAL: return "STOI_EVAL"; case Rewrite::STR_CONV_CONST: return "STR_CONV_CONST"; diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index ccbdbc0cd..bc5de3a8a 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -142,6 +142,11 @@ enum class Rewrite : uint32_t SS_START_NEG, SS_STRIP_END_PT, SS_STRIP_START_PT, + UPD_EVAL, + UPD_EMPTYSTR, + UPD_CONST_INDEX_MAX_OOB, + UPD_CONST_INDEX_NEG, + UPD_CONST_INDEX_OOB, STOI_CONCAT_NONNUM, STOI_EVAL, STR_CONV_CONST, diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 110864c3b..292960e6a 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -570,9 +570,11 @@ Node SequencesRewriter::rewriteLength(Node node) return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV); } } - else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV) + else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV + || nk0 == STRING_UPDATE) { // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. + // len( update( x, n, y ) ) = len( x ) Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]); return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV); } @@ -1406,6 +1408,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteSubstr(node); } + else if (nk == kind::STRING_UPDATE) + { + retNode = rewriteUpdate(node); + } else if (nk == kind::STRING_STRCTN) { retNode = rewriteContains(node); @@ -1800,6 +1806,50 @@ Node SequencesRewriter::rewriteSubstr(Node node) return node; } +Node SequencesRewriter::rewriteUpdate(Node node) +{ + Assert(node.getKind() == kind::STRING_UPDATE); + Node s = node[0]; + if (s.isConst()) + { + if (Word::isEmpty(s)) + { + return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR); + } + // rewriting for constant arguments + if (node[1].isConst()) + { + CVC4::Rational rMaxInt(String::maxSize()); + if (node[1].getConst() > rMaxInt) + { + // start beyond the maximum size of strings + // thus, it must be beyond the end point of this string + return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB); + } + else if (node[1].getConst().sgn() < 0) + { + // start before the beginning of the string + return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG); + } + uint32_t start = + node[1].getConst().getNumerator().toUnsignedInt(); + size_t len = Word::getLength(s); + if (start >= len) + { + // start beyond the end of the string + return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB); + } + if (node[2].isConst()) + { + Node ret = Word::update(s, start, node[2]); + return returnRewrite(node, ret, Rewrite::UPD_EVAL); + } + } + } + + return node; +} + Node SequencesRewriter::rewriteContains(Node node) { Assert(node.getKind() == kind::STRING_STRCTN); diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 8da672cb5..3251579ff 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -170,6 +170,12 @@ class SequencesRewriter : public TheoryRewriter * Returns the rewritten form of node. */ Node rewriteSubstr(Node node); + /** rewrite update + * This is the entry point for post-rewriting terms node of the form + * str.update( s, i1, i2 ) + * Returns the rewritten form of node. + */ + Node rewriteUpdate(Node node); /** rewrite contains * This is the entry point for post-rewriting terms node of the form * str.contains( t, s ) diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index d21cf069d..0b80db2ee 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -140,7 +140,7 @@ void TermRegistry::preRegisterTerm(TNode n) || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER - || k == STRING_TOUPPER || k == STRING_REV) + || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE) { std::stringstream ss; ss << "Term of kind " << k diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 942c8d216..150ea8977 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -86,6 +86,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine.addFunctionKind(kind::STRING_STRCTN); d_equalityEngine.addFunctionKind(kind::STRING_LEQ); d_equalityEngine.addFunctionKind(kind::STRING_SUBSTR); + d_equalityEngine.addFunctionKind(kind::STRING_UPDATE); d_equalityEngine.addFunctionKind(kind::STRING_ITOS); d_equalityEngine.addFunctionKind(kind::STRING_STOI); d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF); diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index af946567b..a752958b2 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -114,6 +114,65 @@ Node StringsPreprocess::reduce(Node t, // Thus, substr( s, n, m ) = skt retNode = skt; } + else if (t.getKind() == kind::STRING_UPDATE) + { + // processing term: update( s, n, r ) + Node s = t[0]; + Node n = t[1]; + Node r = t[2]; + Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); + Node ls = nm->mkNode(STRING_LENGTH, s); + // start point is greater than or equal zero + Node c1 = nm->mkNode(GEQ, n, zero); + // start point is less than end of string + Node c2 = nm->mkNode(GT, ls, n); + Node cond = nm->mkNode(AND, c1, c2); + + // substr(r,0,|s|-n) + Node lens = nm->mkNode(STRING_LENGTH, s); + Node rs; + if (r.isConst() && Word::getLength(r) == 1) + { + // optimization: don't need to take substring for single characters, due + // to guard on where it is used in the reduction below. + rs = r; + } + else + { + rs = nm->mkNode(STRING_SUBSTR, r, zero, nm->mkNode(MINUS, lens, n)); + } + Node rslen = nm->mkNode(STRING_LENGTH, rs); + Node nsuf = nm->mkNode(PLUS, n, rslen); + // substr(s, n, len(substr(r,0,|s|-n))), which is used for formalizing the + // purification variable sk3 below. + Node ssubstr = nm->mkNode(STRING_SUBSTR, s, n, rslen); + + Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); + Node sk2 = + sc->mkSkolemCached(s, nsuf, SkolemCache::SK_SUFFIX_REM, "sssufr"); + Node sk3 = sc->mkSkolemCached(ssubstr, SkolemCache::SK_PURIFY, "ssubstr"); + Node a1 = skt.eqNode(nm->mkNode(STRING_CONCAT, sk1, rs, sk2)); + Node a2 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk3, sk2)); + // length of first skolem is second argument + Node a3 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); + + Node b1 = nm->mkNode(AND, a1, a2, a3); + Node b2 = skt.eqNode(s); + Node lemma = nm->mkNode(ITE, cond, b1, b2); + + // assert: + // IF n >=0 AND n < len( s ) + // THEN: skt = sk1 ++ substr(r,0,len(s)-n) ++ sk2 AND + // s = sk1 ++ sk3 ++ sk2 AND + // len( sk1 ) = n + // ELSE: skt = s + // We use an optimization where r is used instead of substr(r,0,len(s)-n) + // if r is a constant of length one. + asserts.push_back(lemma); + + // Thus, str.update( s, n, r ) = skt + retNode = skt; + } else if (t.getKind() == kind::STRING_STRIDOF) { // processing term: indexof( x, y, n ) diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 78b925ebe..12ddb8a3d 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -95,6 +95,38 @@ class StringSubstrTypeRule } }; +class StringUpdateTypeRule +{ + public: + inline static TypeNode computeType(NodeManager* nodeManager, + TNode n, + bool check) + { + TypeNode t = n[0].getType(check); + if (check) + { + if (!t.isStringLike()) + { + throw TypeCheckingExceptionPrivate( + n, "expecting a string-like term in update"); + } + TypeNode t2 = n[1].getType(check); + if (!t2.isInteger()) + { + throw TypeCheckingExceptionPrivate( + n, "expecting an integer start term in update"); + } + t2 = n[2].getType(check); + if (!t2.isStringLike()) + { + throw TypeCheckingExceptionPrivate( + n, "expecting an string-like replace term in update"); + } + } + return t; + } +}; + class StringAtTypeRule { public: diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index 49221409e..63e3f1dba 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -251,6 +251,28 @@ bool Word::hasSuffix(TNode x, TNode y) return false; } +Node Word::update(TNode x, std::size_t i, TNode t) +{ + NodeManager* nm = NodeManager::currentNM(); + Kind k = x.getKind(); + if (k == CONST_STRING) + { + Assert(t.getKind() == CONST_STRING); + String sx = x.getConst(); + String st = t.getConst(); + return nm->mkConst(String(sx.update(i, st))); + } + else if (k == CONST_SEQUENCE) + { + Assert(t.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst(); + const Sequence& st = t.getConst(); + Sequence res = sx.update(i, st); + return nm->mkConst(res); + } + Unimplemented(); + return Node::null(); +} Node Word::replace(TNode x, TNode y, TNode t) { NodeManager* nm = NodeManager::currentNM(); diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index 3b15b763a..bace06bfb 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -80,6 +80,9 @@ class Word /** Returns true if y is a suffix of x */ static bool hasSuffix(TNode x, TNode y); + /** Replace the character at index n in x with t */ + static Node update(TNode x, std::size_t n, TNode t); + /** Replace the first occurrence of y in x with t */ static Node replace(TNode x, TNode y, TNode t); -- cgit v1.2.3