diff options
author | Andrew Reynolds <andrew.j.reynolds@gmail.com> | 2020-03-20 14:07:37 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-20 14:07:37 -0500 |
commit | 964760cf81eb7414a11bbd89ef3a16e8927d6947 (patch) | |
tree | 0c574e99433c722e69af6efeeefbe0901010f7b7 /src/theory/strings/sequences_rewriter.cpp | |
parent | aa44c35f035f1cab03de0c5fe7c0f16b20f44696 (diff) |
Split string-specific operators from TheoryStringsRewriter (#3920)
Organization towards theory of sequences.
The motivation of this PR is to ensure that string-specific operators in the rewriter are in their own file; thus the use of mkConst<String> / getConst<String> is allowable in rewriter_str.cpp.
Diffstat (limited to 'src/theory/strings/sequences_rewriter.cpp')
-rw-r--r-- | src/theory/strings/sequences_rewriter.cpp | 5597 |
1 files changed, 5597 insertions, 0 deletions
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp new file mode 100644 index 000000000..f4a1cd411 --- /dev/null +++ b/src/theory/strings/sequences_rewriter.cpp @@ -0,0 +1,5597 @@ +/********************* */ +/*! \file sequences_rewriter.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of the theory of strings. + ** + ** Implementation of the theory of strings. + **/ + +#include "theory/strings/sequences_rewriter.h" + +#include <stdint.h> +#include <algorithm> + +#include "expr/node_builder.h" +#include "options/strings_options.h" +#include "smt/logic_exception.h" +#include "theory/arith/arith_msum.h" +#include "theory/strings/regexp_operation.h" +#include "theory/strings/strings_rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" +#include "theory/theory.h" +#include "util/integer.h" +#include "util/rational.h" + +using namespace std; +using namespace CVC4; +using namespace CVC4::kind; +using namespace CVC4::theory; +using namespace CVC4::theory::strings; + +Node SequencesRewriter::simpleRegexpConsume(std::vector<Node>& mchildren, + std::vector<Node>& children, + int dir) +{ + Trace("regexp-ext-rewrite-debug") + << "Simple reg exp consume, dir=" << dir << ":" << std::endl; + Trace("regexp-ext-rewrite-debug") + << " mchildren : " << mchildren << std::endl; + Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; + NodeManager* nm = NodeManager::currentNM(); + unsigned tmin = dir < 0 ? 0 : dir; + unsigned tmax = dir < 0 ? 1 : dir; + // try to remove off front and back + for (unsigned t = 0; t < 2; t++) + { + if (tmin <= t && t <= tmax) + { + bool do_next = true; + while (!children.empty() && !mchildren.empty() && do_next) + { + do_next = false; + Node xc = mchildren[mchildren.size() - 1]; + Node rc = children[children.size() - 1]; + Assert(rc.getKind() != kind::REGEXP_CONCAT); + Assert(xc.getKind() != kind::STRING_CONCAT); + if (rc.getKind() == kind::STRING_TO_REGEXP) + { + if (xc == rc[0]) + { + children.pop_back(); + mchildren.pop_back(); + do_next = true; + Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; + } + else if (xc.isConst() && rc[0].isConst()) + { + // split the constant + int index; + Node s = splitConstant(xc, rc[0], index, t == 0); + Trace("regexp-ext-rewrite-debug") + << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " + << s << " " << index << " " << t << std::endl; + if (s.isNull()) + { + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; + return NodeManager::currentNM()->mkConst(false); + } + else + { + Trace("regexp-ext-rewrite-debug") + << "...strip equal const" << std::endl; + children.pop_back(); + mchildren.pop_back(); + if (index == 0) + { + mchildren.push_back(s); + } + else + { + children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); + } + } + do_next = true; + } + } + else if (xc.isConst()) + { + // check for constants + CVC4::String s = xc.getConst<String>(); + if (Word::isEmpty(xc)) + { + Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; + // ignore and continue + mchildren.pop_back(); + do_next = true; + } + else if (rc.getKind() == kind::REGEXP_RANGE + || rc.getKind() == kind::REGEXP_SIGMA) + { + std::vector<unsigned> ssVec; + ssVec.push_back(t == 0 ? s.back() : s.front()); + CVC4::String ss(ssVec); + if (testConstStringInRegExp(ss, 0, rc)) + { + // strip off one character + mchildren.pop_back(); + if (s.size() > 1) + { + if (t == 0) + { + mchildren.push_back(NodeManager::currentNM()->mkConst( + s.substr(0, s.size() - 1))); + } + else + { + mchildren.push_back( + NodeManager::currentNM()->mkConst(s.substr(1))); + } + } + children.pop_back(); + do_next = true; + } + else + { + return NodeManager::currentNM()->mkConst(false); + } + } + else if (rc.getKind() == kind::REGEXP_INTER + || rc.getKind() == kind::REGEXP_UNION) + { + // see if any/each child does not work + bool result_valid = true; + Node result; + Node emp_s = NodeManager::currentNM()->mkConst(::CVC4::String("")); + for (unsigned i = 0; i < rc.getNumChildren(); i++) + { + std::vector<Node> mchildren_s; + std::vector<Node> children_s; + mchildren_s.push_back(xc); + utils::getConcat(rc[i], children_s); + Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + if (!ret.isNull()) + { + // one conjunct cannot be satisfied, return false + if (rc.getKind() == kind::REGEXP_INTER) + { + return ret; + } + } + else + { + if (children_s.empty()) + { + // if we were able to fully consume, store the result + Assert(mchildren_s.size() <= 1); + if (mchildren_s.empty()) + { + mchildren_s.push_back(emp_s); + } + if (result.isNull()) + { + result = mchildren_s[0]; + } + else if (result != mchildren_s[0]) + { + result_valid = false; + } + } + else + { + result_valid = false; + } + } + } + if (result_valid) + { + if (result.isNull()) + { + // all disjuncts cannot be satisfied, return false + Assert(rc.getKind() == kind::REGEXP_UNION); + return NodeManager::currentNM()->mkConst(false); + } + else + { + // all branches led to the same result + children.pop_back(); + mchildren.pop_back(); + if (result != emp_s) + { + mchildren.push_back(result); + } + do_next = true; + } + } + } + else if (rc.getKind() == kind::REGEXP_STAR) + { + // check if there is no way that this star can be unrolled even once + std::vector<Node> mchildren_s; + mchildren_s.insert( + mchildren_s.end(), mchildren.begin(), mchildren.end()); + if (t == 1) + { + std::reverse(mchildren_s.begin(), mchildren_s.end()); + } + std::vector<Node> children_s; + utils::getConcat(rc[0], children_s); + Trace("regexp-ext-rewrite-debug") + << "...recursive call on body of star" << std::endl; + Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + if (!ret.isNull()) + { + Trace("regexp-ext-rewrite-debug") + << "CRE : regexp star infeasable " << xc << " " << rc + << std::endl; + children.pop_back(); + if (!children.empty()) + { + Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; + do_next = true; + } + } + else + { + if (children_s.empty()) + { + // check if beyond this, we can't do it or there is nothing + // left, if so, repeat + bool can_skip = false; + if (children.size() > 1) + { + std::vector<Node> mchildren_ss; + mchildren_ss.insert( + mchildren_ss.end(), mchildren.begin(), mchildren.end()); + std::vector<Node> children_ss; + children_ss.insert( + children_ss.end(), children.begin(), children.end() - 1); + if (t == 1) + { + std::reverse(mchildren_ss.begin(), mchildren_ss.end()); + std::reverse(children_ss.begin(), children_ss.end()); + } + if (simpleRegexpConsume(mchildren_ss, children_ss, t) + .isNull()) + { + can_skip = true; + } + } + if (!can_skip) + { + Trace("regexp-ext-rewrite-debug") + << "...can't skip" << std::endl; + // take the result of fully consuming once + if (t == 1) + { + std::reverse(mchildren_s.begin(), mchildren_s.end()); + } + mchildren.clear(); + mchildren.insert( + mchildren.end(), mchildren_s.begin(), mchildren_s.end()); + do_next = true; + } + else + { + Trace("regexp-ext-rewrite-debug") + << "...can skip " << rc << " from " << xc << std::endl; + } + } + } + } + } + if (!do_next) + { + Trace("regexp-ext-rewrite") + << "Cannot consume : " << xc << " " << rc << std::endl; + } + } + } + if (dir != 0) + { + std::reverse(children.begin(), children.end()); + std::reverse(mchildren.begin(), mchildren.end()); + } + } + return Node::null(); +} + +Node SequencesRewriter::rewriteEquality(Node node) +{ + Assert(node.getKind() == kind::EQUAL); + if (node[0] == node[1]) + { + return NodeManager::currentNM()->mkConst(true); + } + else if (node[0].isConst() && node[1].isConst()) + { + return NodeManager::currentNM()->mkConst(false); + } + + // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false ) + for (unsigned r = 0; r < 2; r++) + { + // must call rewrite contains directly to avoid infinite loop + // we do a fix point since we may rewrite contains terms to simpler + // contains terms. + Node ctn = checkEntailContains(node[r], node[1 - r], false); + if (!ctn.isNull()) + { + if (!ctn.getConst<bool>()) + { + return returnRewrite(node, ctn, "eq-nctn"); + } + else + { + // definitely contains but not syntactically equal + // We may be able to simplify, e.g. + // str.++( x, "a" ) == "a" ----> x = "" + } + } + } + + // ( len( s ) != len( t ) ) => ( s == t ---> false ) + // This covers cases like str.++( x, x ) == "a" ---> false + Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + Node len_eq = len0.eqNode(len1); + len_eq = Rewriter::rewrite(len_eq); + if (len_eq.isConst() && !len_eq.getConst<bool>()) + { + return returnRewrite(node, len_eq, "eq-len-deq"); + } + + std::vector<Node> c[2]; + for (unsigned i = 0; i < 2; i++) + { + utils::getConcat(node[i], c[i]); + } + + // check if the prefix, suffix mismatches + // For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false + unsigned minsize = std::min(c[0].size(), c[1].size()); + for (unsigned r = 0; r < 2; r++) + { + for (unsigned i = 0; i < minsize; i++) + { + unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i; + unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i; + if (c[0][index1].isConst() && c[1][index2].isConst()) + { + CVC4::String s = c[0][index1].getConst<String>(); + CVC4::String t = c[1][index2].getConst<String>(); + unsigned len_short = s.size() <= t.size() ? s.size() : t.size(); + bool isSameFix = + r == 1 ? s.rstrncmp(t, len_short) : s.strncmp(t, len_short); + if (!isSameFix) + { + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "eq-nfix"); + } + } + if (c[0][index1] != c[1][index2]) + { + break; + } + } + } + + // standard ordering + if (node[0] > node[1]) + { + return NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]); + } + return node; +} + +Node SequencesRewriter::rewriteEqualityExt(Node node) +{ + Assert(node.getKind() == EQUAL); + if (node[0].getType().isInteger()) + { + return rewriteArithEqualityExt(node); + } + if (node[0].getType().isString()) + { + return rewriteStrEqualityExt(node); + } + return node; +} + +Node SequencesRewriter::rewriteStrEqualityExt(Node node) +{ + Assert(node.getKind() == EQUAL && node[0].getType().isString()); + + NodeManager* nm = NodeManager::currentNM(); + std::vector<Node> c[2]; + Node new_ret; + for (unsigned i = 0; i < 2; i++) + { + utils::getConcat(node[i], c[i]); + } + // ------- equality unification + bool changed = false; + for (unsigned i = 0; i < 2; i++) + { + while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back()) + { + c[0].pop_back(); + c[1].pop_back(); + changed = true; + } + // splice constants + if (!c[0].empty() && !c[1].empty() && c[0].back().isConst() + && c[1].back().isConst()) + { + Node cs[2]; + size_t csl[2]; + for (unsigned j = 0; j < 2; j++) + { + cs[j] = c[j].back(); + csl[j] = Word::getLength(cs[j]); + } + size_t larger = csl[0] > csl[1] ? 0 : 1; + size_t smallerSize = csl[1 - larger]; + if (cs[1 - larger] + == (i == 0 ? Word::suffix(cs[larger], smallerSize) + : Word::prefix(cs[larger], smallerSize))) + { + size_t sizeDiff = csl[larger] - smallerSize; + c[larger][c[larger].size() - 1] = + i == 0 ? Word::prefix(cs[larger], sizeDiff) + : Word::suffix(cs[larger], sizeDiff); + c[1 - larger].pop_back(); + changed = true; + } + } + for (unsigned j = 0; j < 2; j++) + { + std::reverse(c[j].begin(), c[j].end()); + } + } + if (changed) + { + // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y + Node s1 = utils::mkConcat(STRING_CONCAT, c[0]); + Node s2 = utils::mkConcat(STRING_CONCAT, c[1]); + new_ret = s1.eqNode(s2); + node = returnRewrite(node, new_ret, "str-eq-unify"); + } + + // ------- homogeneous constants + for (unsigned i = 0; i < 2; i++) + { + Node cn = checkEntailHomogeneousString(node[i]); + if (!cn.isNull() && !Word::isEmpty(cn)) + { + Assert(cn.isConst()); + Assert(Word::getLength(cn) == 1); + unsigned hchar = cn.getConst<String>().front(); + + // The operands of the concat on each side of the equality without + // constant strings + std::vector<Node> trimmed[2]; + // Counts the number of `hchar`s on each side + size_t numHChars[2] = {0, 0}; + for (size_t j = 0; j < 2; j++) + { + // Sort the operands of the concats on both sides of the equality + // (since both sides may only contain one char, the order does not + // matter) + std::sort(c[j].begin(), c[j].end()); + for (const Node& cc : c[j]) + { + if (cc.isConst()) + { + // Count the number of `hchar`s in the string constant and make + // sure that all chars are `hchar`s + std::vector<unsigned> veccc = cc.getConst<String>().getVec(); + for (size_t k = 0, size = veccc.size(); k < size; k++) + { + if (veccc[k] != hchar) + { + // This conflict case should mostly should be taken care of by + // multiset reasoning in the strings rewriter, but we recognize + // this conflict just in case. + new_ret = nm->mkConst(false); + return returnRewrite( + node, new_ret, "string-eq-const-conflict-non-homog"); + } + numHChars[j]++; + } + } + else + { + trimmed[j].push_back(cc); + } + } + } + + // We have to remove the same number of `hchar`s from both sides, so the + // side with less `hchar`s determines how many we can remove + size_t trimmedConst = std::min(numHChars[0], numHChars[1]); + for (size_t j = 0; j < 2; j++) + { + size_t diff = numHChars[j] - trimmedConst; + if (diff != 0) + { + // Add a constant string to the side with more `hchar`s to restore + // the difference in number of `hchar`s + std::vector<unsigned> vec(diff, hchar); + trimmed[j].push_back(nm->mkConst(String(vec))); + } + } + + Node lhs = utils::mkConcat(STRING_CONCAT, trimmed[i]); + Node ss = utils::mkConcat(STRING_CONCAT, trimmed[1 - i]); + if (lhs != node[i] || ss != node[1 - i]) + { + // e.g. + // "AA" = y ++ x ---> "AA" = x ++ y if x < y + // "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z + new_ret = lhs.eqNode(ss); + node = returnRewrite(node, new_ret, "str-eq-homog-const"); + } + } + } + + // ------- rewrites for (= "" _) + Node empty = nm->mkConst(::CVC4::String("")); + for (size_t i = 0; i < 2; i++) + { + if (node[i] == empty) + { + Node ne = node[1 - i]; + if (ne.getKind() == STRING_STRREPL) + { + // (= "" (str.replace x y x)) ---> (= x "") + if (ne[0] == ne[2]) + { + Node ret = nm->mkNode(EQUAL, ne[0], empty); + return returnRewrite(node, ret, "str-emp-repl-x-y-x"); + } + + // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y ""))) + if (checkEntailNonEmpty(ne[2])) + { + Node ret = + nm->mkNode(AND, + nm->mkNode(EQUAL, ne[0], empty), + nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty))); + return returnRewrite(node, ret, "str-emp-repl-emp"); + } + + // (= "" (str.replace x "A" "")) ---> (str.prefix x "A") + if (checkEntailLengthOne(ne[1]) && ne[2] == empty) + { + Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]); + return returnRewrite(node, ret, "str-emp-repl-emp"); + } + } + else if (ne.getKind() == STRING_SUBSTR) + { + Node zero = nm->mkConst(Rational(0)); + + if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true)) + { + // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0 + if (ne[1] == zero) + { + Node ret = nm->mkNode(EQUAL, ne[0], empty); + return returnRewrite(node, ret, "str-emp-substr-leq-len"); + } + + // (= "" (str.substr x n m)) ---> (<= (str.len x) n) + // if n >= 0 and m > 0 + Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]); + return returnRewrite(node, ret, "str-emp-substr-leq-len"); + } + + // (= "" (str.substr "A" 0 z)) ---> (<= z 0) + if (checkEntailNonEmpty(ne[0]) && ne[1] == zero) + { + Node ret = nm->mkNode(LEQ, ne[2], zero); + return returnRewrite(node, ret, "str-emp-substr-leq-z"); + } + } + } + } + + // ------- rewrites for (= (str.replace _ _ _) _) + for (size_t i = 0; i < 2; i++) + { + if (node[i].getKind() == STRING_STRREPL) + { + Node repl = node[i]; + Node x = node[1 - i]; + + // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x)) + if (checkEntailNonEmpty(x) && repl[0] == empty) + { + Node ret = nm->mkNode( + EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1])); + return returnRewrite(node, ret, "str-eq-repl-emp"); + } + + // (= x (str.replace y x y)) ---> (= x y) + if (repl[0] == repl[2] && x == repl[1]) + { + Node ret = nm->mkNode(EQUAL, x, repl[0]); + return returnRewrite(node, ret, "str-eq-repl-to-eq"); + } + + // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A")) + if (x == repl[0]) + { + Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2])); + if (eq.isConst() && !eq.getConst<bool>()) + { + Node ret = nm->mkNode(NOT, nm->mkNode(STRING_STRCTN, x, repl[1])); + return returnRewrite(node, ret, "str-eq-repl-not-ctn"); + } + } + + // (= (str.replace x y z) z) --> (or (= x y) (= x z)) + // if (str.len y) = (str.len z) + if (repl[2] == x) + { + Node lenY = nm->mkNode(STRING_LENGTH, repl[1]); + Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]); + if (checkEntailArithEq(lenY, lenZ)) + { + Node ret = nm->mkNode(OR, + nm->mkNode(EQUAL, repl[0], repl[1]), + nm->mkNode(EQUAL, repl[0], repl[2])); + return returnRewrite(node, ret, "str-eq-repl-to-dis"); + } + } + } + } + + // Try to rewrite (= x y) into a conjunction of equalities based on length + // entailment. + // + // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) ---> + // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + // + // where yi' and yi'' correspond to some yj and + // (<= (str.len x) (str.++ y1' ... ym')) + for (unsigned i = 0; i < 2; i++) + { + if (node[1 - i].getKind() == STRING_CONCAT) + { + new_ret = inferEqsFromContains(node[i], node[1 - i]); + if (!new_ret.isNull()) + { + return returnRewrite(node, new_ret, "str-eq-conj-len-entail"); + } + } + } + + if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT) + { + // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n) + // (str.++ y_1 ... y_j y_{j + 1} ... y_m)) ---> + // (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j)) + // (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m))) + // + // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j)) + // + // This rewrite performs length-based equality splitting: If we can show + // that two prefixes have the same length, we can split an equality into + // two equalities, one over the prefixes and another over the suffixes. + std::vector<Node> v0, v1; + utils::getConcat(node[0], v0); + utils::getConcat(node[1], v1); + size_t startRhs = 0; + for (size_t i = 0, size0 = v0.size(); i <= size0; i++) + { + std::vector<Node> pfxv0(v0.begin(), v0.begin() + i); + Node pfx0 = utils::mkConcat(STRING_CONCAT, pfxv0); + for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++) + { + if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size())) + { + std::vector<Node> pfxv1(v1.begin(), v1.begin() + j); + Node pfx1 = utils::mkConcat(STRING_CONCAT, pfxv1); + Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0); + Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1); + + if (checkEntailArithEq(lenPfx0, lenPfx1)) + { + std::vector<Node> sfxv0(v0.begin() + i, v0.end()); + std::vector<Node> sfxv1(v1.begin() + j, v1.end()); + Node ret = + nm->mkNode(kind::AND, + pfx0.eqNode(pfx1), + utils::mkConcat(STRING_CONCAT, sfxv0) + .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1))); + return returnRewrite(node, ret, "split-eq"); + } + else if (checkEntailArith(lenPfx1, lenPfx0, true)) + { + // The prefix on the right-hand side is strictly longer than the + // prefix on the left-hand side, so we try to strip the right-hand + // prefix by the length of the left-hand prefix + // + // Example: + // (= (str.++ "A" x y) (str.++ x "AB" z)) ---> + // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z))) + std::vector<Node> rpfxv1; + if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) + { + std::vector<Node> sfxv0(v0.begin() + i, v0.end()); + pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end()); + Node ret = nm->mkNode( + kind::AND, + pfx0.eqNode(utils::mkConcat(STRING_CONCAT, rpfxv1)), + utils::mkConcat(STRING_CONCAT, sfxv0) + .eqNode(utils::mkConcat(STRING_CONCAT, pfxv1))); + return returnRewrite(node, ret, "split-eq-strip-r"); + } + + // If the prefix of the right-hand side is (strictly) longer than + // the prefix of the left-hand side, we can advance the left-hand + // side (since the length of the right-hand side is only increasing + // in the inner loop) + break; + } + else if (checkEntailArith(lenPfx0, lenPfx1, true)) + { + // The prefix on the left-hand side is strictly longer than the + // prefix on the right-hand side, so we try to strip the left-hand + // prefix by the length of the right-hand prefix + // + // Example: + // (= (str.++ x "AB" z) (str.++ "A" x y)) ---> + // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y)) + std::vector<Node> rpfxv0; + if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) + { + pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end()); + std::vector<Node> sfxv1(v1.begin() + j, v1.end()); + Node ret = nm->mkNode( + kind::AND, + utils::mkConcat(STRING_CONCAT, rpfxv0).eqNode(pfx1), + utils::mkConcat(STRING_CONCAT, pfxv0) + .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1))); + return returnRewrite(node, ret, "split-eq-strip-l"); + } + + // If the prefix of the left-hand side is (strictly) longer than + // the prefix of the right-hand side, then we don't need to check + // that right-hand prefix for future left-hand prefixes anymore + // (since they are increasing in length) + startRhs = j + 1; + } + } + } + } + } + + return node; +} + +Node SequencesRewriter::rewriteArithEqualityExt(Node node) +{ + Assert(node.getKind() == EQUAL && node[0].getType().isInteger()); + + // cases where we can solve the equality + + // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes. + + return node; +} + +// TODO (#1180) add rewrite +// str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) ---> +// str.substr( x, n1, n2+n3 ) +Node SequencesRewriter::rewriteConcat(Node node) +{ + Assert(node.getKind() == kind::STRING_CONCAT); + Trace("strings-rewrite-debug") + << "Strings::rewriteConcat start " << node << std::endl; + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + std::vector<Node> node_vec; + Node preNode = Node::null(); + for (Node tmpNode : node) + { + if (tmpNode.getKind() == STRING_CONCAT) + { + unsigned j = 0; + // combine the first term with the previous constant if applicable + if (!preNode.isNull()) + { + if (tmpNode[0].isConst()) + { + preNode = nm->mkConst( + preNode.getConst<String>().concat(tmpNode[0].getConst<String>())); + node_vec.push_back(preNode); + } + else + { + node_vec.push_back(preNode); + node_vec.push_back(tmpNode[0]); + } + preNode = Node::null(); + ++j; + } + // insert the middle terms to node_vec + if (j <= tmpNode.getNumChildren() - 1) + { + node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1); + } + // take the last term as the current + tmpNode = tmpNode[tmpNode.getNumChildren() - 1]; + } + if (!tmpNode.isConst()) + { + if (!preNode.isNull()) + { + if (preNode.isConst() && !Word::isEmpty(preNode)) + { + node_vec.push_back(preNode); + } + preNode = Node::null(); + } + node_vec.push_back(tmpNode); + } + else + { + if (preNode.isNull()) + { + preNode = tmpNode; + } + else + { + std::vector<Node> vec; + vec.push_back(preNode); + vec.push_back(tmpNode); + preNode = Word::mkWord(vec); + } + } + } + if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode))) + { + node_vec.push_back(preNode); + } + + // Sort adjacent operands in str.++ that all result in the same string or the + // empty string. + // + // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) --> + // (str.++ ... [sort those 3 arguments] ... ) + size_t lastIdx = 0; + Node lastX; + for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++) + { + Node s = getStringOrEmpty(node_vec[i]); + bool nextX = false; + if (s != lastX) + { + nextX = true; + } + + if (nextX) + { + std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i); + lastX = s; + lastIdx = i; + } + } + std::sort(node_vec.begin() + lastIdx, node_vec.end()); + + retNode = utils::mkConcat(STRING_CONCAT, node_vec); + Trace("strings-rewrite-debug") + << "Strings::rewriteConcat end " << retNode << std::endl; + return retNode; +} + +Node SequencesRewriter::rewriteConcatRegExp(TNode node) +{ + Assert(node.getKind() == kind::REGEXP_CONCAT); + NodeManager* nm = NodeManager::currentNM(); + Trace("strings-rewrite-debug") + << "Strings::rewriteConcatRegExp flatten " << node << std::endl; + Node retNode = node; + std::vector<Node> vec; + bool changed = false; + Node emptyRe; + for (const Node& c : node) + { + if (c.getKind() == REGEXP_CONCAT) + { + changed = true; + for (const Node& cc : c) + { + vec.push_back(cc); + } + } + else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst() + && Word::isEmpty(c[0])) + { + changed = true; + emptyRe = c; + } + else if (c.getKind() == REGEXP_EMPTY) + { + // re.++( ..., empty, ... ) ---> empty + std::vector<Node> nvec; + return nm->mkNode(REGEXP_EMPTY, nvec); + } + else + { + vec.push_back(c); + } + } + if (changed) + { + // flatten + // this handles nested re.++ and elimination or str.to.re(""), e.g.: + // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 ) + if (vec.empty()) + { + Assert(!emptyRe.isNull()); + retNode = emptyRe; + } + else + { + retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec); + } + return returnRewrite(node, retNode, "re.concat-flatten"); + } + Trace("strings-rewrite-debug") + << "Strings::rewriteConcatRegExp start " << node << std::endl; + std::vector<Node> cvec; + // the current accumulation of constant strings + std::vector<Node> preReStr; + // whether the last component was (_)* + bool lastAllStar = false; + String emptyStr = String(""); + // this loop checks to see if components can be combined or dropped + for (unsigned i = 0, size = vec.size(); i <= size; i++) + { + Node curr; + if (i < size) + { + curr = vec[i]; + Assert(curr.getKind() != REGEXP_CONCAT); + } + // update preReStr + if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP) + { + lastAllStar = false; + preReStr.push_back(curr[0]); + curr = Node::null(); + } + else if (!preReStr.empty()) + { + Assert(!lastAllStar); + // this groups consecutive strings a++b ---> ab + Node acc = nm->mkNode(STRING_TO_REGEXP, + utils::mkConcat(STRING_CONCAT, preReStr)); + cvec.push_back(acc); + preReStr.clear(); + } + else if (!curr.isNull() && lastAllStar) + { + // if empty, drop it + // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)* + if (isConstRegExp(curr) && testConstStringInRegExp(emptyStr, 0, curr)) + { + curr = Node::null(); + } + } + if (!curr.isNull()) + { + lastAllStar = false; + if (curr.getKind() == REGEXP_STAR) + { + // we can group stars (a)* ++ (a)* ---> (a)* + if (!cvec.empty() && cvec.back() == curr) + { + curr = Node::null(); + } + else if (curr[0].getKind() == REGEXP_SIGMA) + { + Assert(!lastAllStar); + lastAllStar = true; + // go back and remove empty ones from back of cvec + // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)* + while (!cvec.empty() && isConstRegExp(cvec.back()) + && testConstStringInRegExp(emptyStr, 0, cvec.back())) + { + cvec.pop_back(); + } + } + } + } + if (!curr.isNull()) + { + cvec.push_back(curr); + } + } + Assert(!cvec.empty()); + retNode = utils::mkConcat(REGEXP_CONCAT, cvec); + if (retNode != node) + { + // handles all cases where consecutive re constants are combined or dropped + // as described in the loop above. + return returnRewrite(node, retNode, "re.concat"); + } + + // flipping adjacent star arguments + changed = false; + for (size_t i = 0, size = cvec.size() - 1; i < size; i++) + { + if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1]) + { + // by convention, flip the order (a*)++a ---> a++(a*) + std::swap(cvec[i], cvec[i + 1]); + changed = true; + } + } + if (changed) + { + retNode = utils::mkConcat(REGEXP_CONCAT, cvec); + return returnRewrite(node, retNode, "re.concat.opt"); + } + return node; +} + +Node SequencesRewriter::rewriteStarRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_STAR); + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + if (node[0].getKind() == REGEXP_STAR) + { + // ((R)*)* ---> R* + return returnRewrite(node, node[0], "re-star-nested-star"); + } + else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst() + && Word::isEmpty(node[0][0])) + { + // ("")* ---> "" + return returnRewrite(node, node[0], "re-star-empty-string"); + } + else if (node[0].getKind() == REGEXP_EMPTY) + { + // (empty)* ---> "" + retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))); + return returnRewrite(node, retNode, "re-star-empty"); + } + else if (node[0].getKind() == REGEXP_UNION) + { + // simplification of unions under star + if (hasEpsilonNode(node[0])) + { + bool changed = false; + std::vector<Node> node_vec; + for (const Node& nc : node[0]) + { + if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst() + && Word::isEmpty(nc[0])) + { + // can be removed + changed = true; + } + else + { + node_vec.push_back(nc); + } + } + if (changed) + { + retNode = node_vec.size() == 1 ? node_vec[0] + : nm->mkNode(REGEXP_UNION, node_vec); + retNode = nm->mkNode(REGEXP_STAR, retNode); + // simplification of union beneath star based on loop above + // for example, ( "" | "a" )* ---> ("a")* + return returnRewrite(node, retNode, "re-star-union"); + } + } + } + return node; +} + +Node SequencesRewriter::rewriteAndOrRegExp(TNode node) +{ + Kind nk = node.getKind(); + Assert(nk == REGEXP_UNION || nk == REGEXP_INTER); + Trace("strings-rewrite-debug") + << "Strings::rewriteAndOrRegExp start " << node << std::endl; + std::vector<Node> node_vec; + for (const Node& ni : node) + { + if (ni.getKind() == nk) + { + for (const Node& nic : ni) + { + if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end()) + { + node_vec.push_back(nic); + } + } + } + else if (ni.getKind() == REGEXP_EMPTY) + { + if (nk == REGEXP_INTER) + { + return returnRewrite(node, ni, "re.and-empty"); + } + // otherwise, can ignore + } + else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA) + { + if (nk == REGEXP_UNION) + { + return returnRewrite(node, ni, "re.or-all"); + } + // otherwise, can ignore + } + else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end()) + { + node_vec.push_back(ni); + } + } + NodeManager* nm = NodeManager::currentNM(); + std::vector<Node> nvec; + Node retNode; + if (node_vec.empty()) + { + if (nk == REGEXP_INTER) + { + retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, nvec)); + } + else + { + retNode = nm->mkNode(kind::REGEXP_EMPTY, nvec); + } + } + else + { + retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec); + } + if (retNode != node) + { + // flattening and removing children, based on loop above + return returnRewrite(node, retNode, "re.andor-flatten"); + } + return node; +} + +Node SequencesRewriter::rewriteLoopRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_LOOP); + Node retNode = node; + Node r = node[0]; + if (r.getKind() == REGEXP_STAR) + { + return returnRewrite(node, r, "re.loop-star"); + } + TNode n1 = node[1]; + NodeManager* nm = NodeManager::currentNM(); + CVC4::Rational rMaxInt(String::maxSize()); + AlwaysAssert(n1.isConst()) << "re.loop contains non-constant integer (1)."; + AlwaysAssert(n1.getConst<Rational>().sgn() >= 0) + << "Negative integer in string REGEXP_LOOP (1)"; + Assert(n1.getConst<Rational>() <= rMaxInt) + << "Exceeded UINT32_MAX in string REGEXP_LOOP (1)"; + uint32_t l = n1.getConst<Rational>().getNumerator().toUnsignedInt(); + std::vector<Node> vec_nodes; + for (unsigned i = 0; i < l; i++) + { + vec_nodes.push_back(r); + } + if (node.getNumChildren() == 3) + { + TNode n2 = Rewriter::rewrite(node[2]); + Node n = + vec_nodes.size() == 0 + ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))) + : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes); + AlwaysAssert(n2.isConst()) << "re.loop contains non-constant integer (2)."; + AlwaysAssert(n2.getConst<Rational>().sgn() >= 0) + << "Negative integer in string REGEXP_LOOP (2)"; + Assert(n2.getConst<Rational>() <= rMaxInt) + << "Exceeded UINT32_MAX in string REGEXP_LOOP (2)"; + uint32_t u = n2.getConst<Rational>().getNumerator().toUnsignedInt(); + if (u <= l) + { + retNode = n; + } + else + { + std::vector<Node> vec2; + vec2.push_back(n); + for (unsigned j = l; j < u; j++) + { + vec_nodes.push_back(r); + n = utils::mkConcat(REGEXP_CONCAT, vec_nodes); + vec2.push_back(n); + } + retNode = nm->mkNode(REGEXP_UNION, vec2); + } + } + else + { + Node rest = nm->mkNode(REGEXP_STAR, r); + retNode = vec_nodes.size() == 0 + ? rest + : vec_nodes.size() == 1 + ? nm->mkNode(REGEXP_CONCAT, r, rest) + : nm->mkNode(REGEXP_CONCAT, + nm->mkNode(REGEXP_CONCAT, vec_nodes), + rest); + } + Trace("strings-lp") << "Strings::lp " << node << " => " << retNode + << std::endl; + if (retNode != node) + { + return returnRewrite(node, retNode, "re.loop"); + } + return node; +} + +bool SequencesRewriter::isConstRegExp(TNode t) +{ + if (t.getKind() == kind::STRING_TO_REGEXP) + { + return t[0].isConst(); + } + else if (t.isVar()) + { + return false; + } + else + { + for (unsigned i = 0; i < t.getNumChildren(); ++i) + { + if (!isConstRegExp(t[i])) + { + return false; + } + } + return true; + } +} + +bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s, + unsigned int index_start, + TNode r) +{ + Assert(index_start <= s.size()); + Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " + << index_start << std::endl; + Assert(!r.isVar()); + Kind k = r.getKind(); + switch (k) + { + case kind::STRING_TO_REGEXP: + { + CVC4::String s2 = s.substr(index_start, s.size() - index_start); + if (r[0].isConst()) + { + return (s2 == r[0].getConst<String>()); + } + else + { + Assert(false) << "RegExp contains variables"; + return false; + } + } + case kind::REGEXP_CONCAT: + { + if (s.size() != index_start) + { + std::vector<int> vec_k(r.getNumChildren(), -1); + int start = 0; + int left = (int)s.size() - index_start; + int i = 0; + while (i < (int)r.getNumChildren()) + { + bool flag = true; + if (i == (int)r.getNumChildren() - 1) + { + if (testConstStringInRegExp(s, index_start + start, r[i])) + { + return true; + } + } + else if (i == -1) + { + return false; + } + else + { + for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) + { + CVC4::String t = s.substr(index_start + start, vec_k[i]); + if (testConstStringInRegExp(t, 0, r[i])) + { + start += vec_k[i]; + left -= vec_k[i]; + flag = false; + ++i; + vec_k[i] = -1; + break; + } + } + } + + if (flag) + { + --i; + if (i >= 0) + { + start -= vec_k[i]; + left += vec_k[i]; + } + } + } + return false; + } + else + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (!testConstStringInRegExp(s, index_start, r[i])) return false; + } + return true; + } + } + case kind::REGEXP_UNION: + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (testConstStringInRegExp(s, index_start, r[i])) return true; + } + return false; + } + case kind::REGEXP_INTER: + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (!testConstStringInRegExp(s, index_start, r[i])) return false; + } + return true; + } + case kind::REGEXP_STAR: + { + if (s.size() != index_start) + { + for (unsigned i = s.size() - index_start; i > 0; --i) + { + CVC4::String t = s.substr(index_start, i); + if (testConstStringInRegExp(t, 0, r[0])) + { + if (index_start + i == s.size() + || testConstStringInRegExp(s, index_start + i, r)) + { + return true; + } + } + } + return false; + } + else + { + return true; + } + } + case kind::REGEXP_EMPTY: { return false; + } + case kind::REGEXP_SIGMA: + { + if (s.size() == index_start + 1) + { + return true; + } + else + { + return false; + } + } + case kind::REGEXP_RANGE: + { + if (s.size() == index_start + 1) + { + unsigned a = r[0].getConst<String>().front(); + a = String::convertUnsignedIntToCode(a); + unsigned b = r[1].getConst<String>().front(); + b = String::convertUnsignedIntToCode(b); + unsigned c = s.back(); + c = String::convertUnsignedIntToCode(c); + return (a <= c && c <= b); + } + else + { + return false; + } + } + case kind::REGEXP_LOOP: + { + uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt(); + if (s.size() == index_start) + { + return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]); + } + else if (l == 0 && r[1] == r[2]) + { + return false; + } + else + { + Assert(r.getNumChildren() == 3) + << "String rewriter error: LOOP has 2 children"; + if (l == 0) + { + // R{0,u} + uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt(); + for (unsigned len = s.size() - index_start; len >= 1; len--) + { + CVC4::String t = s.substr(index_start, len); + if (testConstStringInRegExp(t, 0, r[0])) + { + if (len + index_start == s.size()) + { + return true; + } + else + { + Node num2 = + NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); + Node r2 = NodeManager::currentNM()->mkNode( + kind::REGEXP_LOOP, r[0], r[1], num2); + if (testConstStringInRegExp(s, index_start + len, r2)) + { + return true; + } + } + } + } + return false; + } + else + { + // R{l,l} + Assert(r[1] == r[2]) + << "String rewriter error: LOOP nums are not equal"; + if (l > s.size() - index_start) + { + if (testConstStringInRegExp(s, s.size(), r[0])) + { + l = s.size() - index_start; + } + else + { + return false; + } + } + for (unsigned len = 1; len <= s.size() - index_start; len++) + { + CVC4::String t = s.substr(index_start, len); + if (testConstStringInRegExp(t, 0, r[0])) + { + Node num2 = + NodeManager::currentNM()->mkConst(CVC4::Rational(l - 1)); + Node r2 = NodeManager::currentNM()->mkNode( + kind::REGEXP_LOOP, r[0], num2, num2); + if (testConstStringInRegExp(s, index_start + len, r2)) + { + return true; + } + } + } + return false; + } + } + } + case REGEXP_COMPLEMENT: + { + return !testConstStringInRegExp(s, index_start, r[0]); + break; + } + default: + { + Assert(!RegExpOpr::isRegExpKind(k)); + return false; + } + } +} + +Node SequencesRewriter::rewriteMembership(TNode node) +{ + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + Node x = node[0]; + Node r = node[1]; + + if (r.getKind() == kind::REGEXP_EMPTY) + { + retNode = NodeManager::currentNM()->mkConst(false); + } + else if (x.isConst() && isConstRegExp(r)) + { + // test whether x in node[1] + CVC4::String s = x.getConst<String>(); + retNode = + NodeManager::currentNM()->mkConst(testConstStringInRegExp(s, 0, r)); + } + else if (r.getKind() == kind::REGEXP_SIGMA) + { + Node one = nm->mkConst(Rational(1)); + retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x)); + } + else if (r.getKind() == kind::REGEXP_STAR) + { + if (x.isConst()) + { + String s = x.getConst<String>(); + if (s.size() == 0) + { + retNode = nm->mkConst(true); + // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true + return returnRewrite(node, retNode, "re-empty-in-str-star"); + } + else if (s.size() == 1) + { + if (r[0].getKind() == STRING_TO_REGEXP) + { + retNode = r[0][0].eqNode(x); + // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x + return returnRewrite(node, retNode, "re-char-in-str-star"); + } + } + } + else if (x.getKind() == STRING_CONCAT) + { + // (str.in.re (str.++ x1 ... xn) (re.* R)) --> + // (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R)) + // if the length of all strings in R is one. + Node flr = getFixedLengthForRegexp(r[0]); + if (!flr.isNull()) + { + Node one = nm->mkConst(Rational(1)); + if (flr == one) + { + NodeBuilder<> nb(AND); + for (const Node& xc : x) + { + nb << nm->mkNode(STRING_IN_REGEXP, xc, r); + } + return returnRewrite( + node, nb.constructNode(), "re-in-dist-char-star"); + } + } + } + if (r[0].getKind() == kind::REGEXP_SIGMA) + { + retNode = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, retNode, "re-in-sigma-star"); + } + } + else if (r.getKind() == kind::REGEXP_CONCAT) + { + bool allSigma = true; + bool allSigmaStrict = true; + unsigned allSigmaMinSize = 0; + Node constStr; + size_t constIdx = 0; + size_t nchildren = r.getNumChildren(); + for (size_t i = 0; i < nchildren; i++) + { + Node rc = r[i]; + Assert(rc.getKind() != kind::REGEXP_EMPTY); + if (rc.getKind() == kind::REGEXP_SIGMA) + { + allSigmaMinSize++; + } + else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA) + { + allSigmaStrict = false; + } + else if (rc.getKind() == STRING_TO_REGEXP) + { + if (constStr.isNull()) + { + constStr = rc[0]; + constIdx = i; + } + else + { + allSigma = false; + break; + } + } + else + { + allSigma = false; + break; + } + } + if (allSigma) + { + if (constStr.isNull()) + { + // x in re.++(_*, _, _) ---> str.len(x) >= 2 + Node num = nm->mkConst(Rational(allSigmaMinSize)); + Node lenx = nm->mkNode(STRING_LENGTH, x); + retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); + return returnRewrite(node, retNode, "re-concat-pure-allchar"); + } + else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0 + && constIdx != nchildren - 1) + { + // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc") + retNode = nm->mkNode(STRING_STRCTN, x, constStr); + return returnRewrite(node, retNode, "re-concat-to-contains"); + } + } + } + else if (r.getKind() == kind::REGEXP_INTER + || r.getKind() == kind::REGEXP_UNION) + { + std::vector<Node> mvec; + for (unsigned i = 0; i < r.getNumChildren(); i++) + { + mvec.push_back( + NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i])); + } + retNode = NodeManager::currentNM()->mkNode( + r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec); + } + else if (r.getKind() == kind::STRING_TO_REGEXP) + { + retNode = x.eqNode(r[0]); + } + else if (r.getKind() == REGEXP_RANGE) + { + // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j + Node xcode = nm->mkNode(STRING_TO_CODE, x); + retNode = + nm->mkNode(AND, + nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode), + nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1]))); + } + else if (r.getKind() == REGEXP_COMPLEMENT) + { + retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate(); + } + else if (x != node[0] || r != node[1]) + { + retNode = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r); + } + + // do simple consumes + if (retNode == node) + { + if (r.getKind() == kind::REGEXP_STAR) + { + for (unsigned dir = 0; dir <= 1; dir++) + { + std::vector<Node> mchildren; + utils::getConcat(x, mchildren); + bool success = true; + while (success) + { + success = false; + std::vector<Node> children; + utils::getConcat(r[0], children); + Node scn = simpleRegexpConsume(mchildren, children, dir); + if (!scn.isNull()) + { + Trace("regexp-ext-rewrite") + << "Regexp star : const conflict : " << node << std::endl; + return scn; + } + else if (children.empty()) + { + // fully consumed one copy of the STAR + if (mchildren.empty()) + { + Trace("regexp-ext-rewrite") + << "Regexp star : full consume : " << node << std::endl; + return NodeManager::currentNM()->mkConst(true); + } + else + { + retNode = nm->mkNode(STRING_IN_REGEXP, + utils::mkConcat(STRING_CONCAT, mchildren), + r); + success = true; + } + } + } + if (retNode != node) + { + Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node + << " -> " << retNode << std::endl; + break; + } + } + } + else + { + std::vector<Node> children; + utils::getConcat(r, children); + std::vector<Node> mchildren; + utils::getConcat(x, mchildren); + unsigned prevSize = children.size() + mchildren.size(); + Node scn = simpleRegexpConsume(mchildren, children); + if (!scn.isNull()) + { + Trace("regexp-ext-rewrite") + << "Regexp : const conflict : " << node << std::endl; + return scn; + } + else + { + if ((children.size() + mchildren.size()) != prevSize) + { + // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm), + // above, we strip components to construct an equivalent membership: + // (str.++ xi .. xj) in (re.++ rk ... rl). + Node xn = utils::mkConcat(STRING_CONCAT, mchildren); + Node emptyStr = nm->mkConst(String("")); + if (children.empty()) + { + // If we stripped all components on the right, then the left is + // equal to the empty string. + // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "") + retNode = xn.eqNode(emptyStr); + } + else + { + // otherwise, construct the updated regular expression + retNode = nm->mkNode( + STRING_IN_REGEXP, xn, utils::mkConcat(REGEXP_CONCAT, children)); + } + Trace("regexp-ext-rewrite") << "Regexp : rewrite : " << node << " -> " + << retNode << std::endl; + return returnRewrite(node, retNode, "re-simple-consume"); + } + } + } + } + return retNode; +} + +RewriteResponse SequencesRewriter::postRewrite(TNode node) +{ + Trace("strings-postrewrite") + << "Strings::postRewrite start " << node << std::endl; + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + Node orig = retNode; + Kind nk = node.getKind(); + if (nk == kind::STRING_CONCAT) + { + retNode = rewriteConcat(node); + } + else if (nk == kind::EQUAL) + { + retNode = rewriteEquality(node); + } + else if (nk == kind::STRING_LENGTH) + { + Kind nk0 = node[0].getKind(); + if (node[0].isConst()) + { + retNode = nm->mkConst(Rational(Word::getLength(node[0]))); + } + else if (nk0 == kind::STRING_CONCAT) + { + Node tmpNode = node[0]; + if (tmpNode.isConst()) + { + retNode = nm->mkConst(Rational(Word::getLength(tmpNode))); + } + else if (tmpNode.getKind() == kind::STRING_CONCAT) + { + std::vector<Node> node_vec; + for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i) + { + if (tmpNode[i].isConst()) + { + node_vec.push_back( + nm->mkConst(Rational(Word::getLength(tmpNode[i])))); + } + else + { + node_vec.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_LENGTH, tmpNode[i])); + } + } + retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); + } + } + else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL) + { + Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1])); + Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2])); + if (len1 == len2) + { + // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x ) + retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + } + } + else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER + || nk0 == STRING_REV) + { + // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. + retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + } + } + else if (nk == kind::STRING_CHARAT) + { + Node one = NodeManager::currentNM()->mkConst(Rational(1)); + retNode = NodeManager::currentNM()->mkNode( + kind::STRING_SUBSTR, node[0], node[1], one); + } + else if (nk == kind::STRING_SUBSTR) + { + retNode = rewriteSubstr(node); + } + else if (nk == kind::STRING_STRCTN) + { + retNode = rewriteContains(node); + } + else if (nk == kind::STRING_LT) + { + // eliminate s < t ---> s != t AND s <= t + retNode = nm->mkNode(AND, + node[0].eqNode(node[1]).negate(), + nm->mkNode(STRING_LEQ, node[0], node[1])); + } + else if (nk == kind::STRING_LEQ) + { + retNode = StringsRewriter::rewriteStringLeq(node); + } + else if (nk == kind::STRING_STRIDOF) + { + retNode = rewriteIndexof(node); + } + else if (nk == kind::STRING_STRREPL) + { + retNode = rewriteReplace(node); + } + else if (nk == kind::STRING_STRREPLALL) + { + retNode = rewriteReplaceAll(node); + } + else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) + { + retNode = StringsRewriter::rewriteStrConvert(node); + } + else if (nk == STRING_REV) + { + retNode = rewriteStrReverse(node); + } + else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX) + { + retNode = rewritePrefixSuffix(node); + } + else if (nk == STRING_IS_DIGIT) + { + // eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 + Node t = nm->mkNode(STRING_TO_CODE, node[0]); + retNode = nm->mkNode(AND, + nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), + nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); + } + else if (nk == kind::STRING_ITOS) + { + retNode = StringsRewriter::rewriteIntToStr(node); + } + else if (nk == kind::STRING_STOI) + { + retNode = StringsRewriter::rewriteStrToInt(node); + } + else if (nk == kind::STRING_IN_REGEXP) + { + retNode = rewriteMembership(node); + } + else if (nk == STRING_TO_CODE) + { + retNode = StringsRewriter::rewriteStringToCode(node); + } + else if (nk == STRING_FROM_CODE) + { + retNode = StringsRewriter::rewriteStringFromCode(node); + } + else if (nk == REGEXP_CONCAT) + { + retNode = rewriteConcatRegExp(node); + } + else if (nk == REGEXP_UNION || nk == REGEXP_INTER) + { + retNode = rewriteAndOrRegExp(node); + } + else if (nk == REGEXP_DIFF) + { + retNode = nm->mkNode( + REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1])); + } + else if (nk == REGEXP_STAR) + { + retNode = rewriteStarRegExp(node); + } + else if (nk == REGEXP_PLUS) + { + retNode = + nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0])); + } + else if (nk == REGEXP_OPT) + { + retNode = nm->mkNode(REGEXP_UNION, + nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))), + node[0]); + } + else if (nk == REGEXP_RANGE) + { + if (node[0] == node[1]) + { + retNode = nm->mkNode(STRING_TO_REGEXP, node[0]); + } + } + else if (nk == REGEXP_LOOP) + { + retNode = rewriteLoopRegExp(node); + } + + Trace("strings-postrewrite") + << "Strings::postRewrite returning " << retNode << std::endl; + if (orig != retNode) + { + Trace("strings-rewrite-debug") + << "Strings: post-rewrite " << orig << " to " << retNode << std::endl; + } + return RewriteResponse(orig == retNode ? REWRITE_DONE : REWRITE_AGAIN_FULL, + retNode); +} + +bool SequencesRewriter::hasEpsilonNode(TNode node) +{ + for (unsigned int i = 0; i < node.getNumChildren(); i++) + { + if (node[i].getKind() == kind::STRING_TO_REGEXP && node[i][0].isConst() + && Word::isEmpty(node[i][0])) + { + return true; + } + } + return false; +} + +RewriteResponse SequencesRewriter::preRewrite(TNode node) +{ + return RewriteResponse(REWRITE_DONE, node); +} + +Node SequencesRewriter::rewriteSubstr(Node node) +{ + Assert(node.getKind() == kind::STRING_SUBSTR); + + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + if (Word::isEmpty(node[0])) + { + Node ret = node[0]; + return returnRewrite(node, ret, "ss-emptystr"); + } + // rewriting for constant arguments + if (node[1].isConst() && node[2].isConst()) + { + Node s = node[0]; + CVC4::Rational rMaxInt(String::maxSize()); + uint32_t start; + if (node[1].getConst<Rational>() > rMaxInt) + { + // start beyond the maximum size of strings + // thus, it must be beyond the end point of this string + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-start-max-oob"); + } + else if (node[1].getConst<Rational>().sgn() < 0) + { + // start before the beginning of the string + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-start-neg"); + } + else + { + start = node[1].getConst<Rational>().getNumerator().toUnsignedInt(); + if (start >= Word::getLength(node[0])) + { + // start beyond the end of the string + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-start-oob"); + } + } + if (node[2].getConst<Rational>() > rMaxInt) + { + // take up to the end of the string + size_t lenS = Word::getLength(s); + Node ret = Word::suffix(s, lenS - start); + return returnRewrite(node, ret, "ss-const-len-max-oob"); + } + else if (node[2].getConst<Rational>().sgn() <= 0) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-len-non-pos"); + } + else + { + uint32_t len = + node[2].getConst<Rational>().getNumerator().toUnsignedInt(); + if (start + len > Word::getLength(node[0])) + { + // take up to the end of the string + size_t lenS = Word::getLength(s); + Node ret = Word::suffix(s, lenS - start); + return returnRewrite(node, ret, "ss-const-end-oob"); + } + else + { + // compute the substr using the constant string + Node ret = Word::substr(s, start, len); + return returnRewrite(node, ret, "ss-const-ss"); + } + } + } + } + Node zero = nm->mkConst(CVC4::Rational(0)); + + // if entailed non-positive length or negative start point + if (checkEntailArith(zero, node[1], true)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-start-neg"); + } + else if (checkEntailArith(zero, node[2])) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-len-non-pos"); + } + + if (node[0].getKind() == STRING_SUBSTR) + { + // (str.substr (str.substr x a b) c d) ---> "" if c >= b + // + // Note that this rewrite can be generalized to: + // + // (str.substr x a b) ---> "" if a >= (str.len x) + // + // This can be done when we generalize our entailment methods to + // accept an optional context. Then we could conjecture that + // (str.substr x a b) rewrites to "" and do a case analysis: + // + // - a < 0 or b < 0 (the result is trivially empty in these cases) + // - a >= (str.len x) assuming that { a >= 0, b >= 0 } + // + // For example, for (str.substr (str.substr x a a) a a), we could + // then deduce that under those assumptions, "a" is an + // over-approximation of the length of (str.substr x a a), which + // then allows us to reason that the result of the whole term must + // be empty. + if (checkEntailArith(node[1], node[0][2])) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-start-geq-len"); + } + } + else if (node[0].getKind() == STRING_STRREPL) + { + // (str.substr (str.replace x y z) 0 n) + // ---> (str.replace (str.substr x 0 n) y z) + // if (str.len y) = 1 and (str.len z) = 1 + if (node[1] == zero) + { + if (checkEntailLengthOne(node[0][1], true) + && checkEntailLengthOne(node[0][2], true)) + { + Node ret = nm->mkNode( + kind::STRING_STRREPL, + nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]), + node[0][1], + node[0][2]); + return returnRewrite(node, ret, "substr-repl-swap"); + } + } + } + + std::vector<Node> n1; + utils::getConcat(node[0], n1); + + // definite inclusion + if (node[1] == zero) + { + Node curr = node[2]; + std::vector<Node> childrenr; + if (stripSymbolicLength(n1, childrenr, 1, curr)) + { + if (curr != zero && !n1.empty()) + { + childrenr.push_back(nm->mkNode(kind::STRING_SUBSTR, + utils::mkConcat(STRING_CONCAT, n1), + node[1], + curr)); + } + Node ret = utils::mkConcat(STRING_CONCAT, childrenr); + return returnRewrite(node, ret, "ss-len-include"); + } + } + + // symbolic length analysis + for (unsigned r = 0; r < 2; r++) + { + // the amount of characters we can strip + Node curr; + if (r == 0) + { + if (node[1] != zero) + { + // strip up to start point off the start of the string + curr = node[1]; + } + } + else if (r == 1) + { + Node tot_len = + Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0])); + Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2])); + if (node[2] != tot_len) + { + if (checkEntailArith(node[2], tot_len)) + { + // end point beyond end point of string, map to tot_len + Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len); + return returnRewrite(node, ret, "ss-end-pt-norm"); + } + else + { + // strip up to ( str.len(node[0]) - end_pt ) off the end of the string + curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt)); + } + } + + // (str.substr s x y) --> "" if x < len(s) |= 0 >= y + Node n1_lt_tot_len = + Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len)); + if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-start-entails-zero-len"); + } + + // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s) + Node non_zero_len = + Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2])); + if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-non-zero-len-entails-oob"); + } + + // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s) + Node geq_zero_start = + Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero)); + if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-geq-zero-start-entails-emp-s"); + } + + // (str.substr s x x) ---> "" if (str.len s) <= 1 + if (node[1] == node[2] && checkEntailLengthOne(node[0])) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-len-one-z-z"); + } + } + if (!curr.isNull()) + { + // strip off components while quantity is entailed positive + int dir = r == 0 ? 1 : -1; + std::vector<Node> childrenr; + if (stripSymbolicLength(n1, childrenr, dir, curr)) + { + if (r == 0) + { + Node ret = nm->mkNode(kind::STRING_SUBSTR, + utils::mkConcat(STRING_CONCAT, n1), + curr, + node[2]); + return returnRewrite(node, ret, "ss-strip-start-pt"); + } + else + { + Node ret = nm->mkNode(kind::STRING_SUBSTR, + utils::mkConcat(STRING_CONCAT, n1), + node[1], + node[2]); + return returnRewrite(node, ret, "ss-strip-end-pt"); + } + } + } + } + // combine substr + if (node[0].getKind() == kind::STRING_SUBSTR) + { + Node start_inner = node[0][1]; + Node start_outer = node[1]; + if (checkEntailArith(start_outer) && checkEntailArith(start_inner)) + { + // both are positive + // thus, start point is definitely start_inner+start_outer. + // We can rewrite if it for certain what the length is + + // the length of a string from the inner substr subtracts the start point + // of the outer substr + Node len_from_inner = + Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer)); + Node len_from_outer = node[2]; + Node new_len; + // take quantity that is for sure smaller than the other + if (len_from_inner == len_from_outer) + { + new_len = len_from_inner; + } + else if (checkEntailArith(len_from_inner, len_from_outer)) + { + new_len = len_from_outer; + } + else if (checkEntailArith(len_from_outer, len_from_inner)) + { + new_len = len_from_inner; + } + if (!new_len.isNull()) + { + Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer); + Node ret = + nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len); + return returnRewrite(node, ret, "ss-combine"); + } + } + } + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteContains(Node node) +{ + Assert(node.getKind() == kind::STRING_STRCTN); + NodeManager* nm = NodeManager::currentNM(); + + if (node[0] == node[1]) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-eq"); + } + if (node[0].isConst()) + { + CVC4::String s = node[0].getConst<String>(); + if (node[1].isConst()) + { + Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos); + return returnRewrite(node, ret, "ctn-const"); + } + else + { + Node t = node[1]; + if (Word::isEmpty(node[0])) + { + Node len1 = + NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(len1, true)) + { + // we handle the false case here since the rewrite for equality + // uses this function, hence we want to conclude false if possible. + // len(x)>0 => contains( "", x ) ---> false + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-lhs-emptystr"); + } + } + else if (checkEntailLengthOne(t)) + { + const std::vector<unsigned>& vec = s.getVec(); + + NodeBuilder<> nb(OR); + nb << nm->mkConst(String("")).eqNode(t); + for (unsigned c : vec) + { + std::vector<unsigned> sv = {c}; + nb << nm->mkConst(String(sv)).eqNode(t); + } + + // str.contains("ABCabc", t) ---> + // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c" + // if len(t) <= 1 + Node ret = nb; + return returnRewrite(node, ret, "ctn-split"); + } + else if (node[1].getKind() == kind::STRING_CONCAT) + { + int firstc, lastc; + if (!canConstantContainConcat(node[0], node[1], firstc, lastc)) + { + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-nconst-ctn-concat"); + } + } + } + } + if (node[1].isConst()) + { + size_t len = Word::getLength(node[1]); + if (len == 0) + { + // contains( x, "" ) ---> true + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-rhs-emptystr"); + } + else if (len == 1) + { + // The following rewrites are specific to a single character second + // argument of contains, where we can reason that this character is + // not split over multiple components in the first argument. + if (node[0].getKind() == STRING_CONCAT) + { + std::vector<Node> nc1; + utils::getConcat(node[0], nc1); + NodeBuilder<> nb(OR); + for (const Node& ncc : nc1) + { + nb << nm->mkNode(STRING_STRCTN, ncc, node[1]); + } + Node ret = nb.constructNode(); + // str.contains( x ++ y, "A" ) ---> + // str.contains( x, "A" ) OR str.contains( y, "A" ) + return returnRewrite(node, ret, "ctn-concat-char"); + } + else if (node[0].getKind() == STRING_STRREPL) + { + Node rplDomain = checkEntailContains(node[0][1], node[1]); + if (!rplDomain.isNull() && !rplDomain.getConst<bool>()) + { + Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); + Node d2 = + nm->mkNode(AND, + nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]), + nm->mkNode(STRING_STRCTN, node[0][2], node[1])); + Node ret = nm->mkNode(OR, d1, d2); + // If str.contains( y, "A" ) ---> false, then: + // str.contains( str.replace( x, y, z ), "A" ) ---> + // str.contains( x, "A" ) OR + // ( str.contains( x, y ) AND str.contains( z, "A" ) ) + return returnRewrite(node, ret, "ctn-repl-char"); + } + } + } + } + std::vector<Node> nc1; + utils::getConcat(node[0], nc1); + std::vector<Node> nc2; + utils::getConcat(node[1], nc2); + + // component-wise containment + std::vector<Node> nc1rb; + std::vector<Node> nc1re; + if (componentContains(nc1, nc2, nc1rb, nc1re) != -1) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-component"); + } + + // strip endpoints + std::vector<Node> nb; + std::vector<Node> ne; + if (stripConstantEndpoints(nc1, nc2, nb, ne)) + { + Node ret = NodeManager::currentNM()->mkNode( + kind::STRING_STRCTN, utils::mkConcat(STRING_CONCAT, nc1), node[1]); + return returnRewrite(node, ret, "ctn-strip-endpt"); + } + + for (const Node& n : nc2) + { + if (n.getKind() == kind::STRING_STRREPL) + { + // (str.contains x (str.replace y z w)) --> false + // if (str.contains x y) = false and (str.contains x w) = false + // + // Reasoning: (str.contains x y) checks that x does not contain y if the + // replacement does not change y. (str.contains x w) checks that if the + // replacement changes anything in y, the w makes it impossible for it to + // occur in x. + Node ctnConst = checkEntailContains(node[0], n[0]); + if (!ctnConst.isNull() && !ctnConst.getConst<bool>()) + { + Node ctnConst2 = checkEntailContains(node[0], n[2]); + if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>()) + { + Node res = nm->mkConst(false); + return returnRewrite(node, res, "ctn-rpl-non-ctn"); + } + } + + // (str.contains x (str.++ w (str.replace x y x) z)) ---> + // (and (= w "") (= x (str.replace x y x)) (= z "")) + // + // TODO: Remove with under-/over-approximation + if (node[0] == n[0] && node[0] == n[2]) + { + Node ret; + if (nc2.size() > 1) + { + Node emp = nm->mkConst(CVC4::String("")); + NodeBuilder<> nb2(kind::AND); + for (const Node& n2 : nc2) + { + if (n2 == n) + { + nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]); + } + else + { + nb2 << nm->mkNode(kind::EQUAL, emp, n2); + } + } + ret = nb2.constructNode(); + } + else + { + ret = nm->mkNode(kind::EQUAL, node[0], node[1]); + } + return returnRewrite(node, ret, "ctn-repl-self"); + } + } + } + + // length entailment + Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(len_n2, len_n1, true)) + { + // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-len-ineq"); + } + + // multi-set reasoning + // For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false + // since the number of a's in the second argument is greater than the number + // of a's in the first argument + if (checkEntailMultisetSubset(node[0], node[1])) + { + Node ret = nm->mkConst(false); + return returnRewrite(node, ret, "ctn-mset-nss"); + } + + if (checkEntailArith(len_n2, len_n1, false)) + { + // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2 + Node ret = node[0].eqNode(node[1]); + return returnRewrite(node, ret, "ctn-len-ineq-nstrict"); + } + + // splitting + if (node[0].getKind() == kind::STRING_CONCAT) + { + if (node[1].isConst()) + { + CVC4::String t = node[1].getConst<String>(); + // Below, we are looking for a constant component of node[0] + // has no overlap with node[1], which means we can split. + // Notice that if the first or last components had no + // overlap, these would have been removed by strip + // constant endpoints above. + // Hence, we consider only the inner children. + for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++) + { + // constant contains + if (node[0][i].isConst()) + { + CVC4::String s = node[0][i].getConst<String>(); + // if no overlap, we can split into disjunction + if (s.noOverlapWith(t)) + { + std::vector<Node> nc0; + utils::getConcat(node[0], nc0); + std::vector<Node> spl[2]; + spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i); + Assert(i < nc0.size() - 1); + spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end()); + Node ret = NodeManager::currentNM()->mkNode( + kind::OR, + NodeManager::currentNM()->mkNode( + kind::STRING_STRCTN, + utils::mkConcat(STRING_CONCAT, spl[0]), + node[1]), + NodeManager::currentNM()->mkNode( + kind::STRING_STRCTN, + utils::mkConcat(STRING_CONCAT, spl[1]), + node[1])); + return returnRewrite(node, ret, "ctn-split"); + } + } + } + } + } + else if (node[0].getKind() == kind::STRING_SUBSTR) + { + // (str.contains (str.substr x n (str.len y)) y) ---> + // (= (str.substr x n (str.len y)) y) + // + // TODO: Remove with under-/over-approximation + if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1])) + { + Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]); + return returnRewrite(node, ret, "ctn-substr"); + } + } + else if (node[0].getKind() == kind::STRING_STRREPL) + { + if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst()) + { + if (Word::noOverlapWith(node[1], node[0][1]) + && Word::noOverlapWith(node[1], node[0][2])) + { + // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3) + // if there is no overlap between c1 and c3 and none between c2 and c3 + Node ret = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); + return returnRewrite(node, ret, "ctn-repl-cnsts-to-ctn"); + } + } + + if (node[0][0] == node[0][2]) + { + // (str.contains (str.replace x y x) y) ---> (str.contains x y) + if (node[0][1] == node[1]) + { + Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); + return returnRewrite(node, ret, "ctn-repl-to-ctn"); + } + + // (str.contains (str.replace x y x) z) ---> (str.contains x z) + // if (str.len z) <= 1 + if (checkEntailLengthOne(node[1])) + { + Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); + return returnRewrite(node, ret, "ctn-repl-len-one-to-ctn"); + } + } + + // (str.contains (str.replace x y z) z) ---> + // (or (str.contains x y) (str.contains x z)) + if (node[0][2] == node[1]) + { + Node ret = nm->mkNode(OR, + nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]), + nm->mkNode(STRING_STRCTN, node[0][0], node[0][2])); + return returnRewrite(node, ret, "ctn-repl-to-ctn-disj"); + } + + // (str.contains (str.replace x y z) w) ---> + // (str.contains (str.replace x y "") w) + // if (str.contains z w) ---> false and (str.len w) = 1 + if (checkEntailLengthOne(node[1])) + { + Node ctn = checkEntailContains(node[1], node[0][2]); + if (!ctn.isNull() && !ctn.getConst<bool>()) + { + Node empty = nm->mkConst(String("")); + Node ret = nm->mkNode( + kind::STRING_STRCTN, + nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty), + node[1]); + return returnRewrite(node, ret, "ctn-repl-simp-repl"); + } + } + } + + if (node[1].getKind() == kind::STRING_STRREPL) + { + // (str.contains x (str.replace y x y)) ---> + // (str.contains x y) + if (node[0] == node[1][1] && node[1][0] == node[1][2]) + { + Node ret = nm->mkNode(kind::STRING_STRCTN, node[0], node[1][0]); + return returnRewrite(node, ret, "ctn-repl"); + } + + // (str.contains x (str.replace "" x y)) ---> + // (= "" (str.replace "" x y)) + // + // Note: Length-based reasoning is not sufficient to get this rewrite. We + // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0 + // nor str.len(x) - str.len(str.replace("", x, y)) >= 0 + Node emp = nm->mkConst(CVC4::String("")); + if (node[0] == node[1][1] && node[1][0] == emp) + { + Node ret = nm->mkNode(kind::EQUAL, emp, node[1]); + return returnRewrite(node, ret, "ctn-repl-empty"); + } + } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteIndexof(Node node) +{ + Assert(node.getKind() == kind::STRING_STRIDOF); + NodeManager* nm = NodeManager::currentNM(); + + if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0) + { + // z<0 implies str.indexof( x, y, z ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-neg"); + } + + // evaluation and simple cases + std::vector<Node> children0; + utils::getConcat(node[0], children0); + if (children0[0].isConst() && node[1].isConst() && node[2].isConst()) + { + CVC4::Rational rMaxInt(CVC4::String::maxSize()); + if (node[2].getConst<Rational>() > rMaxInt) + { + // We know that, due to limitations on the size of string constants + // in our implementation, that accessing a position greater than + // rMaxInt is guaranteed to be out of bounds. + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-max"); + } + Assert(node[2].getConst<Rational>().sgn() >= 0); + Node s = children0[0]; + Node t = node[1]; + uint32_t start = + node[2].getConst<Rational>().getNumerator().toUnsignedInt(); + std::size_t ret = Word::find(s, t, start); + if (ret != std::string::npos) + { + Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret))); + return returnRewrite(node, retv, "idof-find"); + } + else if (children0.size() == 1) + { + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-nfind"); + } + } + + if (node[0] == node[1]) + { + if (node[2].isConst()) + { + if (node[2].getConst<Rational>().sgn() == 0) + { + // indexof( x, x, 0 ) --> 0 + Node zero = nm->mkConst(Rational(0)); + return returnRewrite(node, zero, "idof-eq-cst-start"); + } + } + if (checkEntailArith(node[2], true)) + { + // y>0 implies indexof( x, x, y ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-eq-nstart"); + } + Node emp = nm->mkConst(CVC4::String("")); + if (node[0] != emp) + { + // indexof( x, x, z ) ---> indexof( "", "", z ) + Node ret = nm->mkNode(STRING_STRIDOF, emp, emp, node[2]); + return returnRewrite(node, ret, "idof-eq-norm"); + } + } + + Node len0 = nm->mkNode(STRING_LENGTH, node[0]); + Node len1 = nm->mkNode(STRING_LENGTH, node[1]); + Node len0m2 = nm->mkNode(MINUS, len0, node[2]); + + if (node[1].isConst()) + { + if (Word::isEmpty(node[1])) + { + if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2])) + { + // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z + return returnRewrite(node, node[2], "idof-emp-idof"); + } + } + } + + if (checkEntailArith(len1, len0m2, true)) + { + // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-len"); + } + + Node fstr = node[0]; + if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0) + { + fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0); + fstr = Rewriter::rewrite(fstr); + } + + Node cmp_conr = checkEntailContains(fstr, node[1]); + Trace("strings-rewrite-debug") << "For " << node << ", check contains(" + << fstr << ", " << node[1] << ")" << std::endl; + Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl; + std::vector<Node> children1; + utils::getConcat(node[1], children1); + if (!cmp_conr.isNull()) + { + if (cmp_conr.getConst<bool>()) + { + if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0) + { + // past the first position in node[0] that contains node[1], we can drop + std::vector<Node> nb; + std::vector<Node> ne; + int cc = componentContains(children0, children1, nb, ne, true, 1); + if (cc != -1 && !ne.empty()) + { + // For example: + // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0) + Node nn = utils::mkConcat(STRING_CONCAT, children0); + Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); + return returnRewrite(node, ret, "idof-def-ctn"); + } + + // Strip components from the beginning that are guaranteed not to match + if (stripConstantEndpoints(children0, children1, nb, ne, 1)) + { + // str.indexof(str.++("AB", x, "C"), "C", 0) ---> + // 2 + str.indexof(str.++(x, "C"), "C", 0) + Node ret = + nm->mkNode(kind::PLUS, + nm->mkNode(kind::STRING_LENGTH, + utils::mkConcat(STRING_CONCAT, nb)), + nm->mkNode(kind::STRING_STRIDOF, + utils::mkConcat(STRING_CONCAT, children0), + node[1], + node[2])); + return returnRewrite(node, ret, "idof-strip-cnst-endpts"); + } + } + + // strip symbolic length + Node new_len = node[2]; + std::vector<Node> nr; + if (stripSymbolicLength(children0, nr, 1, new_len)) + { + // For example: + // z>str.len( x1 ) and str.contains( x2, y )-->true + // implies + // str.indexof( str.++( x1, x2 ), y, z ) ---> + // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) ) + Node nn = utils::mkConcat(STRING_CONCAT, children0); + Node ret = + nm->mkNode(kind::PLUS, + nm->mkNode(kind::MINUS, node[2], new_len), + nm->mkNode(kind::STRING_STRIDOF, nn, node[1], new_len)); + return returnRewrite(node, ret, "idof-strip-sym-len"); + } + } + else + { + // str.contains( x, y ) --> false implies str.indexof(x,y,z) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-nctn"); + } + } + else + { + Node new_len = node[2]; + std::vector<Node> nr; + if (stripSymbolicLength(children0, nr, 1, new_len)) + { + // Normalize the string before the start index. + // + // For example: + // str.indexof(str.++("ABCD", x), y, 3) ---> + // str.indexof(str.++("AAAD", x), y, 3) + Node nodeNr = utils::mkConcat(STRING_CONCAT, nr); + Node normNr = lengthPreserveRewrite(nodeNr); + if (normNr != nodeNr) + { + std::vector<Node> normNrChildren; + utils::getConcat(normNr, normNrChildren); + std::vector<Node> children(normNrChildren); + children.insert(children.end(), children0.begin(), children0.end()); + Node nn = utils::mkConcat(STRING_CONCAT, children); + Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); + return returnRewrite(node, res, "idof-norm-prefix"); + } + } + } + + if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0) + { + std::vector<Node> cb; + std::vector<Node> ce; + if (stripConstantEndpoints(children0, children1, cb, ce, -1)) + { + Node ret = utils::mkConcat(STRING_CONCAT, children0); + ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]); + // For example: + // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 ) + return returnRewrite(node, ret, "rpl-pull-endpt"); + } + } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteReplace(Node node) +{ + Assert(node.getKind() == kind::STRING_STRREPL); + NodeManager* nm = NodeManager::currentNM(); + + if (node[1].isConst() && Word::isEmpty(node[1])) + { + Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]); + return returnRewrite(node, ret, "rpl-rpl-empty"); + } + + std::vector<Node> children0; + utils::getConcat(node[0], children0); + + if (node[1].isConst() && children0[0].isConst()) + { + Node s = children0[0]; + Node t = node[1]; + std::size_t p = Word::find(s, t); + if (p == std::string::npos) + { + if (children0.size() == 1) + { + return returnRewrite(node, node[0], "rpl-const-nfind"); + } + } + else + { + Node s1 = Word::substr(s, 0, p); + Node s3 = Word::substr(s, p + Word::getLength(t)); + std::vector<Node> children; + if (!Word::isEmpty(s1)) + { + children.push_back(s1); + } + children.push_back(node[2]); + if (!Word::isEmpty(s3)) + { + children.push_back(s3); + } + children.insert(children.end(), children0.begin() + 1, children0.end()); + Node ret = utils::mkConcat(STRING_CONCAT, children); + return returnRewrite(node, ret, "rpl-const-find"); + } + } + + // rewrites that apply to both replace and replaceall + Node rri = rewriteReplaceInternal(node); + if (!rri.isNull()) + { + // printing of the rewrite managed by the call above + return rri; + } + + if (node[0] == node[2]) + { + // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x + Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(l1, l0)) + { + return returnRewrite(node, node[0], "rpl-rpl-len-id"); + } + + // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x) + // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "") + if (checkEntailLengthOne(node[0])) + { + Node empty = nm->mkConst(String("")); + Node rn1 = Rewriter::rewrite( + rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty))); + if (rn1 != node[1]) + { + std::vector<Node> emptyNodes; + bool allEmptyEqs; + std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1); + + if (allEmptyEqs) + { + Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodes); + if (node[1] != nn1) + { + Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]); + return returnRewrite(node, ret, "rpl-x-y-x-simp"); + } + } + } + } + } + + std::vector<Node> children1; + utils::getConcat(node[1], children1); + + // check if contains definitely does (or does not) hold + Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]); + Node cmp_conr = Rewriter::rewrite(cmp_con); + if (!checkEntailContains(node[0], node[1]).isNull()) + { + if (cmp_conr.getConst<bool>()) + { + // component-wise containment + std::vector<Node> cb; + std::vector<Node> ce; + int cc = componentContains(children0, children1, cb, ce, true, 1); + if (cc != -1) + { + if (cc == 0 && children0[0] == children1[0]) + { + // definitely a prefix, can do the replace + // for example, + // str.replace( str.++( x, "ab" ), str.++( x, "a" ), y ) ---> + // str.++( y, "b" ) + std::vector<Node> cres; + cres.push_back(node[2]); + cres.insert(cres.end(), ce.begin(), ce.end()); + Node ret = utils::mkConcat(STRING_CONCAT, cres); + return returnRewrite(node, ret, "rpl-cctn-rpl"); + } + else if (!ce.empty()) + { + // we can pull remainder past first definite containment + // for example, + // str.replace( str.++( x, "ab" ), "a", y ) ---> + // str.++( str.replace( str.++( x, "a" ), "a", y ), "b" ) + // this is independent of whether the second argument may be empty + std::vector<Node> scc; + scc.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_STRREPL, + utils::mkConcat(STRING_CONCAT, children0), + node[1], + node[2])); + scc.insert(scc.end(), ce.begin(), ce.end()); + Node ret = utils::mkConcat(STRING_CONCAT, scc); + return returnRewrite(node, ret, "rpl-cctn"); + } + } + } + else + { + // ~contains( t, s ) => ( replace( t, s, r ) ----> t ) + return returnRewrite(node, node[0], "rpl-nctn"); + } + } + else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND) + { + // Rewriting the str.contains may return equalities of the form (= x ""). + // In that case, we can substitute the variables appearing in those + // equalities with the empty string in the third argument of the + // str.replace. For example: + // + // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "") + // + // This can be done because str.replace changes x iff (str.++ x y) is in x + // but that means that y must be empty in that case. Thus, we can + // substitute y with "" in the third argument. Note that the third argument + // does not matter when the str.replace does not apply. + // + Node empty = nm->mkConst(::CVC4::String("")); + + std::vector<Node> emptyNodes; + bool allEmptyEqs; + std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr); + + if (emptyNodes.size() > 0) + { + // Perform the substitutions + std::vector<TNode> substs(emptyNodes.size(), TNode(empty)); + Node nn2 = node[2].substitute( + emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end()); + + // If the contains rewrites to a conjunction of empty-string equalities + // and we are doing the replacement in an empty string, we can rewrite + // the string-to-replace with a concatenation of all the terms that must + // be empty: + // + // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn) z) + // if (str.contains "" y) ---> (and (= y1 "") ... (= yn "")) + if (node[0] == empty && allEmptyEqs) + { + std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end()); + Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodesList); + if (nn1 != node[1] || nn2 != node[2]) + { + Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2); + return returnRewrite(node, res, "rpl-emp-cnts-substs"); + } + } + + if (nn2 != node[2]) + { + Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2); + return returnRewrite(node, res, "rpl-cnts-substs"); + } + } + } + + if (cmp_conr != cmp_con) + { + if (checkEntailNonEmpty(node[1])) + { + // pull endpoints that can be stripped + // for example, + // str.replace( str.++( "b", x, "b" ), "a", y ) ---> + // str.++( "b", str.replace( x, "a", y ), "b" ) + std::vector<Node> cb; + std::vector<Node> ce; + if (stripConstantEndpoints(children0, children1, cb, ce)) + { + std::vector<Node> cc; + cc.insert(cc.end(), cb.begin(), cb.end()); + cc.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_STRREPL, + utils::mkConcat(STRING_CONCAT, children0), + node[1], + node[2])); + cc.insert(cc.end(), ce.begin(), ce.end()); + Node ret = utils::mkConcat(STRING_CONCAT, cc); + return returnRewrite(node, ret, "rpl-pull-endpt"); + } + } + } + + children1.clear(); + utils::getConcat(node[1], children1); + Node lastChild1 = children1[children1.size() - 1]; + if (lastChild1.getKind() == kind::STRING_SUBSTR) + { + // (str.replace x (str.++ t (str.substr y i j)) z) ---> + // (str.replace x (str.++ t + // (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z) + // if j > len(x) + // + // Reasoning: If the string to be replaced is longer than x, then it does + // not matter how much longer it is, the result is always x. Thus, it is + // fine to only look at the prefix of length len(x) + 1 - len(t). + + children1.pop_back(); + // Length of the non-substr components in the second argument + Node partLen1 = nm->mkNode(kind::STRING_LENGTH, + utils::mkConcat(STRING_CONCAT, children1)); + Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]); + + Node zero = nm->mkConst(Rational(0)); + Node one = nm->mkConst(Rational(1)); + Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]); + Node len0_1 = nm->mkNode(kind::PLUS, len0, one); + // Check len(t) + j > len(x) + 1 + if (checkEntailArith(maxLen1, len0_1, true)) + { + children1.push_back(nm->mkNode( + kind::STRING_SUBSTR, + lastChild1[0], + lastChild1[1], + nm->mkNode( + kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1)))); + Node res = nm->mkNode(kind::STRING_STRREPL, + node[0], + utils::mkConcat(STRING_CONCAT, children1), + node[2]); + return returnRewrite(node, res, "repl-subst-idx"); + } + } + + if (node[0].getKind() == STRING_STRREPL) + { + Node x = node[0]; + Node y = node[1]; + Node z = node[2]; + if (x[0] == x[2] && x[0] == y) + { + // (str.replace (str.replace y w y) y z) --> + // (str.replace (str.replace y w z) y z) + // if (str.len w) >= (str.len z) and w != z + // + // Reasoning: There are two cases: (1) w does not appear in y and (2) w + // does appear in y. + // + // Case (1): In this case, the reasoning is trivial. The + // inner replace does not do anything, so we can just replace its third + // argument with any string. + // + // Case (2): After the inner replace, we are guaranteed to have a string + // that contains y at the index of w in the original string y. The outer + // replace then replaces that y with z, so we can short-circuit that + // replace by directly replacing w with z in the inner replace. We can + // only do that if the result of the new inner replace does not contain + // y, otherwise we end up doing two replaces that are different from the + // original expression. We enforce that by requiring that the length of w + // has to be greater or equal to the length of z and that w and z have to + // be different. This makes sure that an inner replace changes a string + // to a string that is shorter than y, making it impossible for the outer + // replace to match. + Node w = x[1]; + + // (str.len w) >= (str.len z) + Node wlen = nm->mkNode(kind::STRING_LENGTH, w); + Node zlen = nm->mkNode(kind::STRING_LENGTH, z); + if (checkEntailArith(wlen, zlen)) + { + // w != z + Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z)); + if (wEqZ.isConst() && !wEqZ.getConst<bool>()) + { + Node ret = nm->mkNode(kind::STRING_STRREPL, + nm->mkNode(kind::STRING_STRREPL, y, w, z), + y, + z); + return returnRewrite(node, ret, "repl-repl-short-circuit"); + } + } + } + } + + if (node[1].getKind() == STRING_STRREPL) + { + if (node[1][0] == node[0]) + { + if (node[1][0] == node[1][2] && node[1][0] == node[2]) + { + // str.replace( x, str.replace( x, y, x ), x ) ---> x + return returnRewrite(node, node[0], "repl-repl2-inv-id"); + } + bool dualReplIteSuccess = false; + Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); + if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>()) + { + // str.contains( x, z ) ---> false + // implies + // str.replace( x, str.replace( x, y, z ), w ) ---> + // ite( str.contains( x, y ), x, w ) + dualReplIteSuccess = true; + } + else + { + // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false + // implies + // str.replace( x, str.replace( x, y, z ), w ) ---> + // ite( str.contains( x, y ), x, w ) + cmp_con2 = checkEntailContains(node[1][1], node[1][2]); + if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>()) + { + cmp_con2 = checkEntailContains(node[1][2], node[1][1]); + if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>()) + { + dualReplIteSuccess = true; + } + } + } + if (dualReplIteSuccess) + { + Node res = nm->mkNode(ITE, + nm->mkNode(STRING_STRCTN, node[0], node[1][1]), + node[0], + node[2]); + return returnRewrite(node, res, "repl-dual-repl-ite"); + } + } + + bool invSuccess = false; + if (node[1][1] == node[0]) + { + if (node[1][0] == node[1][2]) + { + // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w) + invSuccess = true; + } + else if (node[1][1] == node[2] || node[1][0] == node[2]) + { + // str.contains(y, z) ----> false and ( y == w or x == w ) implies + // implies + // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w) + Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); + invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>(); + } + } + else + { + // str.contains(x, z) ----> false and str.contains(x, w) ----> false + // implies + // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u) + Node cmp_con2 = checkEntailContains(node[0], node[1][1]); + if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>()) + { + cmp_con2 = checkEntailContains(node[0], node[1][2]); + invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>(); + } + } + if (invSuccess) + { + Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]); + return returnRewrite(node, res, "repl-repl2-inv"); + } + } + if (node[2].getKind() == STRING_STRREPL) + { + if (node[2][1] == node[0]) + { + // str.contains( z, w ) ----> false implies + // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z ) + Node cmp_con2 = checkEntailContains(node[1], node[2][0]); + if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>()) + { + Node res = + nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]); + return returnRewrite(node, res, "repl-repl3-inv"); + } + } + if (node[2][0] == node[1]) + { + bool success = false; + if (node[2][0] == node[2][2] && node[2][1] == node[0]) + { + // str.replace( x, y, str.replace( y, x, y ) ) ---> x + success = true; + } + else + { + // str.contains( x, z ) ----> false implies + // str.replace( x, y, str.replace( y, z, w ) ) ---> x + cmp_con = checkEntailContains(node[0], node[2][1]); + success = !cmp_con.isNull() && !cmp_con.getConst<bool>(); + } + if (success) + { + return returnRewrite(node, node[0], "repl-repl3-inv-id"); + } + } + } + // miniscope based on components that do not contribute to contains + // for example, + // str.replace( x ++ y ++ x ++ y, "A", z ) --> + // str.replace( x ++ y, "A", z ) ++ x ++ y + // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y. + if (checkEntailLengthOne(node[1])) + { + Node lastLhs; + unsigned lastCheckIndex = 0; + for (unsigned i = 1, iend = children0.size(); i < iend; i++) + { + unsigned checkIndex = children0.size() - i; + std::vector<Node> checkLhs; + checkLhs.insert( + checkLhs.end(), children0.begin(), children0.begin() + checkIndex); + Node lhs = utils::mkConcat(STRING_CONCAT, checkLhs); + Node rhs = children0[checkIndex]; + Node ctn = checkEntailContains(lhs, rhs); + if (!ctn.isNull() && ctn.getConst<bool>()) + { + lastLhs = lhs; + lastCheckIndex = checkIndex; + } + else + { + break; + } + } + if (!lastLhs.isNull()) + { + std::vector<Node> remc(children0.begin() + lastCheckIndex, + children0.end()); + Node rem = utils::mkConcat(STRING_CONCAT, remc); + Node ret = + nm->mkNode(STRING_CONCAT, + nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]), + rem); + // for example: + // str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x + // Since we know that the first occurrence of "A" cannot be in the + // second occurrence of x. Notice this is specific to single characters + // due to complications with finds that span multiple components for + // non-characters. + return returnRewrite(node, ret, "repl-char-ncontrib-find"); + } + } + + // TODO (#1180) incorporate these? + // contains( t, s ) => + // replace( replace( x, t, s ), s, r ) ----> replace( x, t, r ) + // contains( t, s ) => + // contains( replace( t, s, r ), r ) ----> true + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteReplaceAll(Node node) +{ + Assert(node.getKind() == STRING_STRREPLALL); + + if (node[0].isConst() && node[1].isConst()) + { + std::vector<Node> children; + Node s = node[0]; + Node t = node[1]; + if (Word::isEmpty(s) || Word::isEmpty(t)) + { + return returnRewrite(node, node[0], "replall-empty-find"); + } + std::size_t sizeS = Word::getLength(s); + std::size_t sizeT = Word::getLength(t); + std::size_t index = 0; + std::size_t curr = 0; + do + { + curr = Word::find(s, t, index); + if (curr != std::string::npos) + { + if (curr > index) + { + children.push_back(Word::substr(s, index, curr - index)); + } + children.push_back(node[2]); + index = curr + sizeT; + } + else + { + children.push_back(Word::substr(s, index, sizeS - index)); + } + } while (curr != std::string::npos && curr < sizeS); + // constant evaluation + Node res = utils::mkConcat(STRING_CONCAT, children); + return returnRewrite(node, res, "replall-const"); + } + + // rewrites that apply to both replace and replaceall + Node rri = rewriteReplaceInternal(node); + if (!rri.isNull()) + { + // printing of the rewrite managed by the call above + return rri; + } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteReplaceInternal(Node node) +{ + Kind nk = node.getKind(); + Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL); + + if (node[1] == node[2]) + { + return returnRewrite(node, node[0], "rpl-id"); + } + + if (node[0] == node[1]) + { + // only holds for replaceall if non-empty + if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1])) + { + return returnRewrite(node, node[2], "rpl-replace"); + } + } + + return Node::null(); +} + +Node SequencesRewriter::rewriteStrReverse(Node node) +{ + Assert(node.getKind() == STRING_REV); + NodeManager* nm = NodeManager::currentNM(); + Node x = node[0]; + if (x.isConst()) + { + std::vector<unsigned> nvec = node[0].getConst<String>().getVec(); + std::reverse(nvec.begin(), nvec.end()); + Node retNode = nm->mkConst(String(nvec)); + return returnRewrite(node, retNode, "str-conv-const"); + } + else if (x.getKind() == STRING_CONCAT) + { + std::vector<Node> children; + for (const Node& nc : x) + { + children.push_back(nm->mkNode(STRING_REV, nc)); + } + std::reverse(children.begin(), children.end()); + // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 ) + Node retNode = nm->mkNode(STRING_CONCAT, children); + return returnRewrite(node, retNode, "str-rev-minscope-concat"); + } + else if (x.getKind() == STRING_REV) + { + // rev( rev( x ) ) --> x + Node retNode = x[0]; + return returnRewrite(node, retNode, "str-rev-idem"); + } + return node; +} + +Node SequencesRewriter::rewritePrefixSuffix(Node n) +{ + Assert(n.getKind() == kind::STRING_PREFIX + || n.getKind() == kind::STRING_SUFFIX); + bool isPrefix = n.getKind() == kind::STRING_PREFIX; + if (n[0] == n[1]) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(n, ret, "suf/prefix-eq"); + } + if (n[0].isConst()) + { + CVC4::String t = n[0].getConst<String>(); + if (t.isEmptyString()) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(n, ret, "suf/prefix-empty-const"); + } + } + if (n[1].isConst()) + { + Node s = n[1]; + size_t lenS = Word::getLength(s); + if (n[0].isConst()) + { + Node ret = NodeManager::currentNM()->mkConst(false); + Node t = n[0]; + size_t lenT = Word::getLength(t); + if (lenS >= lenT) + { + if ((isPrefix && t == Word::prefix(s, lenT)) + || (!isPrefix && t == Word::suffix(s, lenT))) + { + ret = NodeManager::currentNM()->mkConst(true); + } + } + return returnRewrite(n, ret, "suf/prefix-const"); + } + else if (lenS == 0) + { + Node ret = n[0].eqNode(n[1]); + return returnRewrite(n, ret, "suf/prefix-empty"); + } + else if (lenS == 1) + { + // (str.prefix x "A") and (str.suffix x "A") are equivalent to + // (str.contains "A" x ) + Node ret = + NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]); + return returnRewrite(n, ret, "suf/prefix-ctn"); + } + } + Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]); + Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]); + Node val; + if (isPrefix) + { + val = NodeManager::currentNM()->mkConst(::CVC4::Rational(0)); + } + else + { + val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens); + } + + // Check if we can turn the prefix/suffix into equalities by showing that the + // prefix/suffix is at least as long as the string + Node eqs = inferEqsFromContains(n[1], n[0]); + if (!eqs.isNull()) + { + return returnRewrite(n, eqs, "suf/prefix-to-eqs"); + } + + // general reduction to equality + substr + Node retNode = n[0].eqNode( + NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens)); + + return retNode; +} + +Node SequencesRewriter::splitConstant(Node a, Node b, int& index, bool isRev) +{ + Assert(a.isConst() && b.isConst()); + size_t lenA = Word::getLength(a); + size_t lenB = Word::getLength(b); + index = lenA <= lenB ? 1 : 0; + size_t len_short = index == 1 ? lenA : lenB; + bool cmp = + isRev ? a.getConst<String>().rstrncmp(b.getConst<String>(), len_short) + : a.getConst<String>().strncmp(b.getConst<String>(), len_short); + if (cmp) + { + Node l = index == 0 ? a : b; + if (isRev) + { + int new_len = l.getConst<String>().size() - len_short; + return Word::substr(l, 0, new_len); + } + else + { + return Word::substr(l, len_short); + } + } + // not the same prefix/suffix + return Node::null(); +} + +bool SequencesRewriter::canConstantContainConcat(Node c, + Node n, + int& firstc, + int& lastc) +{ + Assert(c.isConst()); + CVC4::String t = c.getConst<String>(); + const std::vector<unsigned>& tvec = t.getVec(); + Assert(n.getKind() == kind::STRING_CONCAT); + // must find constant components in order + size_t pos = 0; + firstc = -1; + lastc = -1; + for (unsigned i = 0; i < n.getNumChildren(); i++) + { + if (n[i].isConst()) + { + firstc = firstc == -1 ? i : firstc; + lastc = i; + CVC4::String s = n[i].getConst<String>(); + size_t new_pos = t.find(s, pos); + if (new_pos == std::string::npos) + { + return false; + } + else + { + pos = new_pos + s.size(); + } + } + else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0])) + { + // find the first occurrence of a digit starting at pos + while (pos < tvec.size() && !String::isDigit(tvec[pos])) + { + pos++; + } + if (pos == tvec.size()) + { + return false; + } + // must consume at least one digit here + pos++; + } + } + return true; +} + +bool SequencesRewriter::canConstantContainList(Node c, + std::vector<Node>& l, + int& firstc, + int& lastc) +{ + Assert(c.isConst()); + // must find constant components in order + size_t pos = 0; + firstc = -1; + lastc = -1; + for (unsigned i = 0; i < l.size(); i++) + { + if (l[i].isConst()) + { + firstc = firstc == -1 ? i : firstc; + lastc = i; + size_t new_pos = Word::find(c, l[i], pos); + if (new_pos == std::string::npos) + { + return false; + } + else + { + pos = new_pos + Word::getLength(l[i]); + } + } + } + return true; +} + +bool SequencesRewriter::stripSymbolicLength(std::vector<Node>& n1, + std::vector<Node>& nr, + int dir, + Node& curr) +{ + Assert(dir == 1 || dir == -1); + Assert(nr.empty()); + Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); + bool ret = false; + bool success; + unsigned sindex = 0; + do + { + Assert(!curr.isNull()); + success = false; + if (curr != zero && sindex < n1.size()) + { + unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); + if (n1[sindex_use].isConst()) + { + // could strip part of a constant + Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr)); + if (!lowerBound.isNull()) + { + Assert(lowerBound.isConst()); + Rational lbr = lowerBound.getConst<Rational>(); + if (lbr.sgn() > 0) + { + Assert(checkEntailArith(curr, true)); + CVC4::String s = n1[sindex_use].getConst<String>(); + Node ncl = + NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); + Node next_s = + NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl); + next_s = Rewriter::rewrite(next_s); + Assert(next_s.isConst()); + // we can remove the entire constant + if (next_s.getConst<Rational>().sgn() >= 0) + { + curr = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl)); + success = true; + sindex++; + } + else + { + // we can remove part of the constant + // lower bound minus the length of a concrete string is negative, + // hence lowerBound cannot be larger than long max + Assert(lbr < Rational(String::maxSize())); + curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::MINUS, curr, lowerBound)); + uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); + Assert(lbsize < s.size()); + if (dir == 1) + { + // strip partially from the front + nr.push_back( + NodeManager::currentNM()->mkConst(s.prefix(lbsize))); + n1[sindex_use] = NodeManager::currentNM()->mkConst( + s.suffix(s.size() - lbsize)); + } + else + { + // strip partially from the back + nr.push_back( + NodeManager::currentNM()->mkConst(s.suffix(lbsize))); + n1[sindex_use] = NodeManager::currentNM()->mkConst( + s.prefix(s.size() - lbsize)); + } + ret = true; + } + Assert(checkEntailArith(curr)); + } + else + { + // we cannot remove the constant + } + } + } + else + { + Node next_s = NodeManager::currentNM()->mkNode( + kind::MINUS, + curr, + NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, + n1[sindex_use])); + next_s = Rewriter::rewrite(next_s); + if (checkEntailArith(next_s)) + { + success = true; + curr = next_s; + sindex++; + } + } + } + } while (success); + if (sindex > 0) + { + if (dir == 1) + { + nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); + n1.erase(n1.begin(), n1.begin() + sindex); + } + else + { + nr.insert(nr.end(), n1.end() - sindex, n1.end()); + n1.erase(n1.end() - sindex, n1.end()); + } + ret = true; + } + return ret; +} + +int SequencesRewriter::componentContains(std::vector<Node>& n1, + std::vector<Node>& n2, + std::vector<Node>& nb, + std::vector<Node>& ne, + bool computeRemainder, + int remainderDir) +{ + Assert(nb.empty()); + Assert(ne.empty()); + // if n2 is a singleton, we can do optimized version here + if (n2.size() == 1) + { + for (unsigned i = 0; i < n1.size(); i++) + { + Node n1rb; + Node n1re; + if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) + { + if (computeRemainder) + { + n1[i] = n2[0]; + if (remainderDir != -1) + { + if (!n1re.isNull()) + { + ne.push_back(n1re); + } + ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); + n1.erase(n1.begin() + i + 1, n1.end()); + } + else if (!n1re.isNull()) + { + n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, n1[i], n1re)); + } + if (remainderDir != 1) + { + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb.isNull()) + { + nb.push_back(n1rb); + } + } + else if (!n1rb.isNull()) + { + n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, n1rb, n1[i])); + } + } + return i; + } + } + } + else if (n1.size() >= n2.size()) + { + unsigned diff = n1.size() - n2.size(); + for (unsigned i = 0; i <= diff; i++) + { + Node n1rb_first; + Node n1re_first; + // first component of n2 must be a suffix + if (componentContainsBase(n1[i], + n2[0], + n1rb_first, + n1re_first, + 1, + computeRemainder && remainderDir != 1)) + { + Assert(n1re_first.isNull()); + for (unsigned j = 1; j < n2.size(); j++) + { + // are we in the last component? + if (j + 1 == n2.size()) + { + Node n1rb_last; + Node n1re_last; + // last component of n2 must be a prefix + if (componentContainsBase(n1[i + j], + n2[j], + n1rb_last, + n1re_last, + -1, + computeRemainder && remainderDir != -1)) + { + Assert(n1rb_last.isNull()); + if (computeRemainder) + { + if (remainderDir != -1) + { + if (!n1re_last.isNull()) + { + ne.push_back(n1re_last); + } + ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); + n1.erase(n1.begin() + i + j + 1, n1.end()); + n1[i + j] = n2[j]; + } + if (remainderDir != 1) + { + n1[i] = n2[0]; + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb_first.isNull()) + { + nb.push_back(n1rb_first); + } + } + } + return i; + } + else + { + break; + } + } + else if (n1[i + j] != n2[j]) + { + break; + } + } + } + } + } + return -1; +} + +bool SequencesRewriter::componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) +{ + Assert(n1rb.isNull()); + Assert(n1re.isNull()); + + NodeManager* nm = NodeManager::currentNM(); + + if (n1 == n2) + { + return true; + } + else + { + if (n1.isConst() && n2.isConst()) + { + size_t len1 = Word::getLength(n1); + size_t len2 = Word::getLength(n2); + if (len2 < len1) + { + if (dir == 1) + { + if (Word::suffix(n1, len2) == n2) + { + if (computeRemainder) + { + n1rb = Word::prefix(n1, len1 - len2); + } + return true; + } + } + else if (dir == -1) + { + if (Word::prefix(n1, len2) == n2) + { + if (computeRemainder) + { + n1re = Word::suffix(n1, len1 - len2); + } + return true; + } + } + else + { + size_t f = Word::find(n1, n2); + if (f != std::string::npos) + { + if (computeRemainder) + { + if (f > 0) + { + n1rb = Word::prefix(n1, f); + } + if (len1 > f + len2) + { + n1re = Word::suffix(n1, len1 - (f + len2)); + } + } + return true; + } + } + } + } + else + { + // cases for: + // n1 = x containing n2 = substr( x, n2[1], n2[2] ) + if (n2.getKind() == kind::STRING_SUBSTR) + { + if (n2[0] == n1) + { + bool success = true; + Node start_pos = n2[1]; + Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]); + Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]); + if (dir == 1) + { + // To be a suffix, start + length must be greater than + // or equal to the length of the string. + success = checkEntailArith(end_pos, len_n2s); + } + else if (dir == -1) + { + // To be a prefix, must literally start at 0, since + // if we knew it started at <0, it should be rewritten to "", + // if we knew it started at 0, then n2[1] should be rewritten to + // 0. + success = start_pos.isConst() + && start_pos.getConst<Rational>().sgn() == 0; + } + if (success) + { + if (computeRemainder) + { + // we can only compute the remainder if start_pos and end_pos + // are known to be non-negative. + if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos)) + { + return false; + } + if (dir != 1) + { + n1rb = nm->mkNode(kind::STRING_SUBSTR, + n2[0], + nm->mkConst(Rational(0)), + start_pos); + } + if (dir != -1) + { + n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s); + } + } + return true; + } + } + } + + if (!computeRemainder && dir == 0) + { + if (n1.getKind() == STRING_STRREPL) + { + // (str.contains (str.replace x y z) w) ---> true + // if (str.contains x w) --> true and (str.contains z w) ---> true + Node xCtnW = checkEntailContains(n1[0], n2); + if (!xCtnW.isNull() && xCtnW.getConst<bool>()) + { + Node zCtnW = checkEntailContains(n1[2], n2); + if (!zCtnW.isNull() && zCtnW.getConst<bool>()) + { + return true; + } + } + } + } + } + } + return false; +} + +bool SequencesRewriter::stripConstantEndpoints(std::vector<Node>& n1, + std::vector<Node>& n2, + std::vector<Node>& nb, + std::vector<Node>& ne, + int dir) +{ + Assert(nb.empty()); + Assert(ne.empty()); + + NodeManager* nm = NodeManager::currentNM(); + bool changed = false; + // for ( forwards, backwards ) direction + for (unsigned r = 0; r < 2; r++) + { + if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) + { + unsigned index0 = r == 0 ? 0 : n1.size() - 1; + unsigned index1 = r == 0 ? 0 : n2.size() - 1; + bool removeComponent = false; + Node n1cmp = n1[index0]; + + if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0) + { + return false; + } + + std::vector<Node> sss; + std::vector<Node> sls; + n1cmp = decomposeSubstrChain(n1cmp, sss, sls); + Trace("strings-rewrite-debug2") + << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] + << ", dir = " << dir << std::endl; + if (n1cmp.isConst()) + { + CVC4::String s = n1cmp.getConst<String>(); + // overlap is an overapproximation of the number of characters + // n2[index1] can match in s + unsigned overlap = s.size(); + if (n2[index1].isConst()) + { + CVC4::String t = n2[index1].getConst<String>(); + std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); + if (ret == std::string::npos) + { + if (n1.size() == 1) + { + // can remove everything + // e.g. str.contains( "abc", str.++( "ba", x ) ) --> + // str.contains( "", str.++( "ba", x ) ) + removeComponent = true; + } + else if (sss.empty()) // only if not substr + { + // check how much overlap there is + // This is used to partially strip off the endpoint + // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> + // str.contains( str.++( "c", x ), str.++( "cd", y ) ) + overlap = r == 0 ? s.overlap(t) : t.overlap(s); + } + else + { + // if we are looking at a substring, we can remove the component + // if there is no overlap + // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) + // --> str.contains( x, "a" ) + removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); + } + } + else if (sss.empty()) // only if not substr + { + Assert(ret < s.size()); + // can strip off up to the find position, e.g. + // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> + // str.contains( str.++( "bc", x ), str.++( "b", y ) ), + // and + // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> + // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) + overlap = s.size() - ret; + } + } + else + { + // inconclusive + } + // process the overlap + if (overlap < s.size()) + { + changed = true; + if (overlap == 0) + { + removeComponent = true; + } + else + { + // can drop the prefix (resp. suffix) from the first (resp. last) + // component + if (r == 0) + { + nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); + n1[index0] = nm->mkConst(s.suffix(overlap)); + } + else + { + ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); + n1[index0] = nm->mkConst(s.prefix(overlap)); + } + } + } + } + else if (n1cmp.getKind() == kind::STRING_ITOS) + { + if (n2[index1].isConst()) + { + CVC4::String t = n2[index1].getConst<String>(); + + if (n1.size() == 1) + { + // if n1.size()==1, then if n2[index1] is not a number, we can drop + // the entire component + // e.g. str.contains( int.to.str(x), "123a45") --> false + if (!t.isNumber()) + { + removeComponent = true; + } + } + else + { + const std::vector<unsigned>& tvec = t.getVec(); + Assert(tvec.size() > 0); + + // if n1.size()>1, then if the first (resp. last) character of + // n2[index1] + // is not a digit, we can drop the entire component, e.g.: + // str.contains( str.++( int.to.str(x), y ), "a12") --> + // str.contains( y, "a12" ) + // str.contains( str.++( y, int.to.str(x) ), "a0b") --> + // str.contains( y, "a0b" ) + unsigned i = r == 0 ? 0 : (tvec.size() - 1); + if (!String::isDigit(tvec[i])) + { + removeComponent = true; + } + } + } + } + if (removeComponent) + { + // can drop entire first (resp. last) component + if (r == 0) + { + nb.push_back(n1[index0]); + n1.erase(n1.begin(), n1.begin() + 1); + } + else + { + ne.push_back(n1[index0]); + n1.pop_back(); + } + if (n1.empty()) + { + // if we've removed everything, just return (we will rewrite to false) + return true; + } + else + { + changed = true; + } + } + } + } + // TODO (#1180) : computing the maximal overlap in this function may be + // important. + // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> + // false + // ...since str.to.int(x) can contain at most 1 character from "2aaaa", + // leaving 4 characters + // which is larger that the upper bound for length of str.substr(y,0,3), + // which is 3. + return changed; +} + +Node SequencesRewriter::canonicalStrForSymbolicLength(Node len) +{ + NodeManager* nm = NodeManager::currentNM(); + + Node res; + if (len.getKind() == kind::CONST_RATIONAL) + { + // c -> "A" repeated c times + Rational ratLen = len.getConst<Rational>(); + Assert(ratLen.getDenominator() == 1); + Integer intLen = ratLen.getNumerator(); + res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A'))); + } + else if (len.getKind() == kind::PLUS) + { + // x + y -> norm(x) + norm(y) + NodeBuilder<> concatBuilder(kind::STRING_CONCAT); + for (const auto& n : len) + { + Node sn = canonicalStrForSymbolicLength(n); + if (sn.isNull()) + { + return Node::null(); + } + std::vector<Node> snChildren; + utils::getConcat(sn, snChildren); + concatBuilder.append(snChildren); + } + res = concatBuilder.constructNode(); + } + else if (len.getKind() == kind::MULT && len.getNumChildren() == 2 + && len[0].isConst()) + { + // c * x -> norm(x) repeated c times + Rational ratReps = len[0].getConst<Rational>(); + Assert(ratReps.getDenominator() == 1); + Integer intReps = ratReps.getNumerator(); + + Node nRep = canonicalStrForSymbolicLength(len[1]); + std::vector<Node> nRepChildren; + utils::getConcat(nRep, nRepChildren); + NodeBuilder<> concatBuilder(kind::STRING_CONCAT); + for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++) + { + concatBuilder.append(nRepChildren); + } + res = concatBuilder.constructNode(); + } + else if (len.getKind() == kind::STRING_LENGTH) + { + // len(x) -> x + res = len[0]; + } + return res; +} + +Node SequencesRewriter::lengthPreserveRewrite(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); + Node res = canonicalStrForSymbolicLength(len); + return res.isNull() ? n : res; +} + +Node SequencesRewriter::checkEntailContains(Node a, Node b, bool fullRewriter) +{ + NodeManager* nm = NodeManager::currentNM(); + Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b); + + if (fullRewriter) + { + ctn = Rewriter::rewrite(ctn); + } + else + { + Node prev; + do + { + prev = ctn; + ctn = rewriteContains(ctn); + } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN); + } + + Assert(ctn.getType().isBoolean()); + return ctn.isConst() ? ctn : Node::null(); +} + +bool SequencesRewriter::checkEntailNonEmpty(Node a) +{ + Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); + len = Rewriter::rewrite(len); + return checkEntailArith(len, true); +} + +bool SequencesRewriter::checkEntailLengthOne(Node s, bool strict) +{ + NodeManager* nm = NodeManager::currentNM(); + Node one = nm->mkConst(Rational(1)); + Node len = nm->mkNode(STRING_LENGTH, s); + len = Rewriter::rewrite(len); + return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true)); +} + +bool SequencesRewriter::checkEntailArithEq(Node a, Node b) +{ + if (a == b) + { + return true; + } + else + { + Node ar = Rewriter::rewrite(a); + Node br = Rewriter::rewrite(b); + return ar == br; + } +} + +bool SequencesRewriter::checkEntailArith(Node a, Node b, bool strict) +{ + if (a == b) + { + return !strict; + } + else + { + Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b); + return checkEntailArith(diff, strict); + } +} + +struct StrCheckEntailArithTag +{ +}; +struct StrCheckEntailArithComputedTag +{ +}; +/** Attribute true for expressions for which checkEntailArith returned true */ +typedef expr::Attribute<StrCheckEntailArithTag, bool> StrCheckEntailArithAttr; +typedef expr::Attribute<StrCheckEntailArithComputedTag, bool> + StrCheckEntailArithComputedAttr; + +bool SequencesRewriter::checkEntailArith(Node a, bool strict) +{ + if (a.isConst()) + { + return a.getConst<Rational>().sgn() >= (strict ? 1 : 0); + } + + Node ar = + strict + ? NodeManager::currentNM()->mkNode( + kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1))) + : a; + ar = Rewriter::rewrite(ar); + + if (ar.getAttribute(StrCheckEntailArithComputedAttr())) + { + return ar.getAttribute(StrCheckEntailArithAttr()); + } + + bool ret = checkEntailArithInternal(ar); + if (!ret) + { + // try with approximations + ret = checkEntailArithApprox(ar); + } + // cache the result + ar.setAttribute(StrCheckEntailArithAttr(), ret); + ar.setAttribute(StrCheckEntailArithComputedAttr(), true); + return ret; +} + +bool SequencesRewriter::checkEntailArithApprox(Node ar) +{ + Assert(Rewriter::rewrite(ar) == ar); + NodeManager* nm = NodeManager::currentNM(); + std::map<Node, Node> msum; + Trace("strings-ent-approx-debug") + << "Setup arithmetic approximations for " << ar << std::endl; + if (!ArithMSum::getMonomialSum(ar, msum)) + { + Trace("strings-ent-approx-debug") + << "...failed to get monomial sum!" << std::endl; + return false; + } + // for each monomial v*c, mApprox[v] a list of + // possibilities for how the term can be soundly approximated, that is, + // if mApprox[v] contains av, then v*c > av*c. Notice that if c + // is positive, then v > av, otherwise if c is negative, then v < av. + // In other words, av is an under-approximation if c is positive, and an + // over-approximation if c is negative. + bool changed = false; + std::map<Node, std::vector<Node> > mApprox; + // map from approximations to their monomial sums + std::map<Node, std::map<Node, Node> > approxMsums; + // aarSum stores each monomial that does not have multiple approximations + std::vector<Node> aarSum; + for (std::pair<const Node, Node>& m : msum) + { + Node v = m.first; + Node c = m.second; + Trace("strings-ent-approx-debug") + << "Get approximations " << v << "..." << std::endl; + if (v.isNull()) + { + Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c; + aarSum.push_back(mn); + } + else + { + // c.isNull() means c = 1 + bool isOverApprox = !c.isNull() && c.getConst<Rational>().sgn() == -1; + std::vector<Node>& approx = mApprox[v]; + std::unordered_set<Node, NodeHashFunction> visited; + std::vector<Node> toProcess; + toProcess.push_back(v); + do + { + Node curr = toProcess.back(); + Trace("strings-ent-approx-debug") << " process " << curr << std::endl; + curr = Rewriter::rewrite(curr); + toProcess.pop_back(); + if (visited.find(curr) == visited.end()) + { + visited.insert(curr); + std::vector<Node> currApprox; + getArithApproximations(curr, currApprox, isOverApprox); + if (currApprox.empty()) + { + Trace("strings-ent-approx-debug") + << "...approximation: " << curr << std::endl; + // no approximations, thus curr is a possibility + approx.push_back(curr); + } + else + { + toProcess.insert( + toProcess.end(), currApprox.begin(), currApprox.end()); + } + } + } while (!toProcess.empty()); + Assert(!approx.empty()); + // if we have only one approximation, move it to final + if (approx.size() == 1) + { + changed = v != approx[0]; + Node mn = ArithMSum::mkCoeffTerm(c, approx[0]); + aarSum.push_back(mn); + mApprox.erase(v); + } + else + { + // compute monomial sum form for each approximation, used below + for (const Node& aa : approx) + { + if (approxMsums.find(aa) == approxMsums.end()) + { + CVC4_UNUSED bool ret = + ArithMSum::getMonomialSum(aa, approxMsums[aa]); + Assert(ret); + } + } + changed = true; + } + } + } + if (!changed) + { + // approximations had no effect, return + Trace("strings-ent-approx-debug") << "...no approximations" << std::endl; + return false; + } + // get the current "fixed" sum for the abstraction of ar + Node aar = aarSum.empty() + ? nm->mkConst(Rational(0)) + : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum)); + aar = Rewriter::rewrite(aar); + Trace("strings-ent-approx-debug") + << "...processed fixed sum " << aar << " with " << mApprox.size() + << " approximated monomials." << std::endl; + // if we have a choice of how to approximate + if (!mApprox.empty()) + { + // convert aar back to monomial sum + std::map<Node, Node> msumAar; + if (!ArithMSum::getMonomialSum(aar, msumAar)) + { + return false; + } + if (Trace.isOn("strings-ent-approx")) + { + Trace("strings-ent-approx") + << "---- Check arithmetic entailment by under-approximation " << ar + << " >= 0" << std::endl; + Trace("strings-ent-approx") << "FIXED:" << std::endl; + ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx"); + Trace("strings-ent-approx") << "APPROX:" << std::endl; + for (std::pair<const Node, std::vector<Node> >& a : mApprox) + { + Node c = msum[a.first]; + Trace("strings-ent-approx") << " "; + if (!c.isNull()) + { + Trace("strings-ent-approx") << c << " * "; + } + Trace("strings-ent-approx") + << a.second << " ...from " << a.first << std::endl; + } + Trace("strings-ent-approx") << std::endl; + } + Rational one(1); + // incorporate monomials one at a time that have a choice of approximations + while (!mApprox.empty()) + { + Node v; + Node vapprox; + int maxScore = -1; + // Look at each approximation, take the one with the best score. + // Notice that we are in the process of trying to prove + // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0, + // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar) + // and approx_1 ... approx_m are possible approximations. The + // intution here is that we want coefficients c1...cn to be positive. + // This is because arithmetic string terms t1...tn (which may be + // applications of len, indexof, str.to.int) are never entailed to be + // negative. Hence, we add the approx_i that contributes the "most" + // towards making all constants c1...cn positive and cancelling negative + // monomials in approx_i itself. + for (std::pair<const Node, std::vector<Node> >& nam : mApprox) + { + Node cr = msum[nam.first]; + for (const Node& aa : nam.second) + { + unsigned helpsCancelCount = 0; + unsigned addsObligationCount = 0; + std::map<Node, Node>::iterator it; + // we are processing an approximation cr*( c1*t1 + ... + cn*tn ) + for (std::pair<const Node, Node>& aam : approxMsums[aa]) + { + // Say aar is of the form t + c*ti, and aam is the monomial ci*ti + // where ci != 0. We say aam: + // (1) helps cancel if c != 0 and c>0 != ci>0 + // (2) adds obligation if c>=0 and c+ci<0 + Node ti = aam.first; + Node ci = aam.second; + if (!cr.isNull()) + { + ci = ci.isNull() ? cr + : Rewriter::rewrite(nm->mkNode(MULT, ci, cr)); + } + Trace("strings-ent-approx-debug") << ci << "*" << ti << " "; + int ciSgn = ci.isNull() ? 1 : ci.getConst<Rational>().sgn(); + it = msumAar.find(ti); + if (it != msumAar.end()) + { + Node c = it->second; + int cSgn = c.isNull() ? 1 : c.getConst<Rational>().sgn(); + if (cSgn == 0) + { + addsObligationCount += (ciSgn == -1 ? 1 : 0); + } + else if (cSgn != ciSgn) + { + helpsCancelCount++; + Rational r1 = c.isNull() ? one : c.getConst<Rational>(); + Rational r2 = ci.isNull() ? one : ci.getConst<Rational>(); + Rational r12 = r1 + r2; + if (r12.sgn() == -1) + { + addsObligationCount++; + } + } + } + else + { + addsObligationCount += (ciSgn == -1 ? 1 : 0); + } + } + Trace("strings-ent-approx-debug") + << "counts=" << helpsCancelCount << "," << addsObligationCount + << " for " << aa << " into " << aar << std::endl; + int score = (addsObligationCount > 0 ? 0 : 2) + + (helpsCancelCount > 0 ? 1 : 0); + // if its the best, update v and vapprox + if (v.isNull() || score > maxScore) + { + v = nam.first; + vapprox = aa; + maxScore = score; + } + } + if (!v.isNull()) + { + break; + } + } + Trace("strings-ent-approx") + << "- Decide " << v << " = " << vapprox << std::endl; + // we incorporate v approximated by vapprox into the overall approximation + // for ar + Assert(!v.isNull() && !vapprox.isNull()); + Assert(msum.find(v) != msum.end()); + Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox); + aar = nm->mkNode(PLUS, aar, mn); + // update the msumAar map + aar = Rewriter::rewrite(aar); + msumAar.clear(); + if (!ArithMSum::getMonomialSum(aar, msumAar)) + { + Assert(false); + Trace("strings-ent-approx") + << "...failed to get monomial sum!" << std::endl; + return false; + } + // we have processed the approximation for v + mApprox.erase(v); + } + Trace("strings-ent-approx") << "-----------------" << std::endl; + } + if (aar == ar) + { + Trace("strings-ent-approx-debug") + << "...approximation had no effect" << std::endl; + // this should never happen, but we avoid the infinite loop for sanity here + Assert(false); + return false; + } + // Check entailment on the approximation of ar. + // Notice that this may trigger further reasoning by approximation. For + // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be + // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on + // this call, where in the recursive call we may over-approximate + // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer + // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two + // steps. + if (checkEntailArith(aar)) + { + Trace("strings-ent-approx") + << "*** StrArithApprox: showed " << ar + << " >= 0 using under-approximation!" << std::endl; + Trace("strings-ent-approx") + << "*** StrArithApprox: under-approximation was " << aar << std::endl; + return true; + } + return false; +} + +void SequencesRewriter::getArithApproximations(Node a, + std::vector<Node>& approx, + bool isOverApprox) +{ + NodeManager* nm = NodeManager::currentNM(); + // We do not handle PLUS here since this leads to exponential behavior. + // Instead, this is managed, e.g. during checkEntailArithApprox, where + // PLUS terms are expanded "on-demand" during the reasoning. + Trace("strings-ent-approx-debug") + << "Get arith approximations " << a << std::endl; + Kind ak = a.getKind(); + if (ak == MULT) + { + Node c; + Node v; + if (ArithMSum::getMonomial(a, c, v)) + { + bool isNeg = c.getConst<Rational>().sgn() == -1; + getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox); + for (unsigned i = 0, size = approx.size(); i < size; i++) + { + approx[i] = nm->mkNode(MULT, c, approx[i]); + } + } + } + else if (ak == STRING_LENGTH) + { + Kind aak = a[0].getKind(); + if (aak == STRING_SUBSTR) + { + // over,under-approximations for len( substr( x, n, m ) ) + Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); + if (isOverApprox) + { + // m >= 0 implies + // m >= len( substr( x, n, m ) ) + if (checkEntailArith(a[0][2])) + { + approx.push_back(a[0][2]); + } + if (checkEntailArith(lenx, a[0][1])) + { + // n <= len( x ) implies + // len( x ) - n >= len( substr( x, n, m ) ) + approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); + } + else + { + // len( x ) >= len( substr( x, n, m ) ) + approx.push_back(lenx); + } + } + else + { + // 0 <= n and n+m <= len( x ) implies + // m <= len( substr( x, n, m ) ) + Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]); + if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm)) + { + approx.push_back(a[0][2]); + } + // 0 <= n and n+m >= len( x ) implies + // len(x)-n <= len( substr( x, n, m ) ) + if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx)) + { + approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); + } + } + } + else if (aak == STRING_STRREPL) + { + // over,under-approximations for len( replace( x, y, z ) ) + // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) ) + Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); + Node leny = nm->mkNode(STRING_LENGTH, a[0][1]); + Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]); + if (isOverApprox) + { + if (checkEntailArith(leny, lenz)) + { + // len( y ) >= len( z ) implies + // len( x ) >= len( replace( x, y, z ) ) + approx.push_back(lenx); + } + else + { + // len( x ) + len( z ) >= len( replace( x, y, z ) ) + approx.push_back(nm->mkNode(PLUS, lenx, lenz)); + } + } + else + { + if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx)) + { + // len( y ) <= len( z ) or len( x ) <= len( z ) implies + // len( x ) <= len( replace( x, y, z ) ) + approx.push_back(lenx); + } + else + { + // len( x ) - len( y ) <= len( replace( x, y, z ) ) + approx.push_back(nm->mkNode(MINUS, lenx, leny)); + } + } + } + else if (aak == STRING_ITOS) + { + // over,under-approximations for len( int.to.str( x ) ) + if (isOverApprox) + { + if (checkEntailArith(a[0][0], false)) + { + if (checkEntailArith(a[0][0], true)) + { + // x > 0 implies + // x >= len( int.to.str( x ) ) + approx.push_back(a[0][0]); + } + else + { + // x >= 0 implies + // x+1 >= len( int.to.str( x ) ) + approx.push_back( + nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0])); + } + } + } + else + { + if (checkEntailArith(a[0][0])) + { + // x >= 0 implies + // len( int.to.str( x ) ) >= 1 + approx.push_back(nm->mkConst(Rational(1))); + } + // other crazy things are possible here, e.g. + // len( int.to.str( len( y ) + 10 ) ) >= 2 + } + } + } + else if (ak == STRING_STRIDOF) + { + // over,under-approximations for indexof( x, y, n ) + if (isOverApprox) + { + Node lenx = nm->mkNode(STRING_LENGTH, a[0]); + Node leny = nm->mkNode(STRING_LENGTH, a[1]); + if (checkEntailArith(lenx, leny)) + { + // len( x ) >= len( y ) implies + // len( x ) - len( y ) >= indexof( x, y, n ) + approx.push_back(nm->mkNode(MINUS, lenx, leny)); + } + else + { + // len( x ) >= indexof( x, y, n ) + approx.push_back(lenx); + } + } + else + { + // TODO?: + // contains( substr( x, n, len( x ) ), y ) implies + // n <= indexof( x, y, n ) + // ...hard to test, runs risk of non-termination + + // -1 <= indexof( x, y, n ) + approx.push_back(nm->mkConst(Rational(-1))); + } + } + else if (ak == STRING_STOI) + { + // over,under-approximations for str.to.int( x ) + if (isOverApprox) + { + // TODO?: + // y >= 0 implies + // y >= str.to.int( int.to.str( y ) ) + } + else + { + // -1 <= str.to.int( x ) + approx.push_back(nm->mkConst(Rational(-1))); + } + } + Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl; +} + +bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b) +{ + NodeManager* nm = NodeManager::currentNM(); + + std::vector<Node> avec; + utils::getConcat(getMultisetApproximation(a), avec); + std::vector<Node> bvec; + utils::getConcat(b, bvec); + + std::map<Node, unsigned> num_nconst[2]; + std::map<Node, unsigned> num_const[2]; + for (unsigned j = 0; j < 2; j++) + { + std::vector<Node>& jvec = j == 0 ? avec : bvec; + for (const Node& cc : jvec) + { + if (cc.isConst()) + { + num_const[j][cc]++; + } + else + { + num_nconst[j][cc]++; + } + } + } + bool ms_success = true; + for (std::pair<const Node, unsigned>& nncp : num_nconst[0]) + { + if (nncp.second > num_nconst[1][nncp.first]) + { + ms_success = false; + break; + } + } + if (ms_success) + { + // count the number of constant characters in the first argument + std::map<Node, unsigned> count_const[2]; + std::vector<Node> chars; + for (unsigned j = 0; j < 2; j++) + { + for (std::pair<const Node, unsigned>& ncp : num_const[j]) + { + Node cn = ncp.first; + Assert(cn.isConst()); + std::vector<unsigned> cc_vec; + const std::vector<unsigned>& cvec = cn.getConst<String>().getVec(); + for (unsigned i = 0, size = cvec.size(); i < size; i++) + { + // make the character + cc_vec.clear(); + cc_vec.insert(cc_vec.end(), cvec.begin() + i, cvec.begin() + i + 1); + Node ch = nm->mkConst(String(cc_vec)); + count_const[j][ch] += ncp.second; + if (std::find(chars.begin(), chars.end(), ch) == chars.end()) + { + chars.push_back(ch); + } + } + } + } + Trace("strings-entail-ms-ss") + << "For " << a << " and " << b << " : " << std::endl; + for (const Node& ch : chars) + { + Trace("strings-entail-ms-ss") << " # occurrences of substring "; + Trace("strings-entail-ms-ss") << ch << " in arguments is "; + Trace("strings-entail-ms-ss") + << count_const[0][ch] << " / " << count_const[1][ch] << std::endl; + if (count_const[0][ch] < count_const[1][ch]) + { + return true; + } + } + + // TODO (#1180): count the number of 2,3,4,.. character substrings + // for example: + // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false + // since the second argument contains more occurrences of "bb". + // note this is orthogonal reasoning to inductive reasoning + // via regular membership reduction in Liang et al CAV 2015. + } + return false; +} + +Node SequencesRewriter::checkEntailHomogeneousString(Node a) +{ + NodeManager* nm = NodeManager::currentNM(); + + std::vector<Node> avec; + utils::getConcat(getMultisetApproximation(a), avec); + + bool cValid = false; + unsigned c = 0; + for (const Node& ac : avec) + { + if (ac.isConst()) + { + std::vector<unsigned> acv = ac.getConst<String>().getVec(); + for (unsigned cc : acv) + { + if (!cValid) + { + cValid = true; + c = cc; + } + else if (c != cc) + { + // Found a different character + return Node::null(); + } + } + } + else + { + // Could produce a different character + return Node::null(); + } + } + + if (!cValid) + { + return nm->mkConst(String("")); + } + + std::vector<unsigned> cv = {c}; + return nm->mkConst(String(cv)); +} + +Node SequencesRewriter::getMultisetApproximation(Node a) +{ + NodeManager* nm = NodeManager::currentNM(); + if (a.getKind() == STRING_SUBSTR) + { + return a[0]; + } + else if (a.getKind() == STRING_STRREPL) + { + return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); + } + else if (a.getKind() == STRING_CONCAT) + { + NodeBuilder<> nb(STRING_CONCAT); + for (const Node& ac : a) + { + nb << getMultisetApproximation(ac); + } + return nb.constructNode(); + } + else + { + return a; + } +} + +bool SequencesRewriter::checkEntailArithWithEqAssumption(Node assumption, + Node a, + bool strict) +{ + Assert(assumption.getKind() == kind::EQUAL); + Assert(Rewriter::rewrite(assumption) == assumption); + + // Find candidates variables to compute substitutions for + std::unordered_set<Node, NodeHashFunction> candVars; + std::vector<Node> toVisit = {assumption}; + while (!toVisit.empty()) + { + Node curr = toVisit.back(); + toVisit.pop_back(); + + if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT + || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL) + { + for (const auto& currChild : curr) + { + toVisit.push_back(currChild); + } + } + else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH) + { + candVars.insert(curr); + } + else if (curr.getKind() == kind::STRING_LENGTH) + { + candVars.insert(curr); + } + } + + // Check if any of the candidate variables are in n + Node v; + Assert(toVisit.empty()); + toVisit.push_back(a); + while (!toVisit.empty()) + { + Node curr = toVisit.back(); + toVisit.pop_back(); + + for (const auto& currChild : curr) + { + toVisit.push_back(currChild); + } + + if (candVars.find(curr) != candVars.end()) + { + v = curr; + break; + } + } + + if (v.isNull()) + { + // No suitable candidate found + return false; + } + + Node solution = ArithMSum::solveEqualityFor(assumption, v); + if (solution.isNull()) + { + // Could not solve for v + return false; + } + + a = a.substitute(TNode(v), TNode(solution)); + return checkEntailArith(a, strict); +} + +bool SequencesRewriter::checkEntailArithWithAssumption(Node assumption, + Node a, + Node b, + bool strict) +{ + Assert(Rewriter::rewrite(assumption) == assumption); + + NodeManager* nm = NodeManager::currentNM(); + + if (!assumption.isConst() && assumption.getKind() != kind::EQUAL) + { + // We rewrite inequality assumptions from x <= y to x + (str.len s) = y + // where s is some fresh string variable. We use (str.len s) because + // (str.len s) must be non-negative for the equation to hold. + Node x, y; + if (assumption.getKind() == kind::GEQ) + { + x = assumption[0]; + y = assumption[1]; + } + else + { + // (not (>= s t)) --> (>= (t - 1) s) + Assert(assumption.getKind() == kind::NOT + && assumption[0].getKind() == kind::GEQ); + x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1))); + y = assumption[0][0]; + } + + Node s = nm->mkBoundVar("slackVal", nm->stringType()); + Node slen = nm->mkNode(kind::STRING_LENGTH, s); + assumption = Rewriter::rewrite( + nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen))); + } + + Node diff = nm->mkNode(kind::MINUS, a, b); + bool res = false; + if (assumption.isConst()) + { + bool assumptionBool = assumption.getConst<bool>(); + if (assumptionBool) + { + res = checkEntailArith(diff, strict); + } + else + { + res = true; + } + } + else + { + res = checkEntailArithWithEqAssumption(assumption, diff, strict); + } + return res; +} + +bool SequencesRewriter::checkEntailArithWithAssumptions( + std::vector<Node> assumptions, Node a, Node b, bool strict) +{ + // TODO: We currently try to show the entailment with each assumption + // independently. In the future, we should make better use of multiple + // assumptions. + bool res = false; + for (const auto& assumption : assumptions) + { + Assert(Rewriter::rewrite(assumption) == assumption); + + if (checkEntailArithWithAssumption(assumption, a, b, strict)) + { + res = true; + break; + } + } + return res; +} + +Node SequencesRewriter::getConstantArithBound(Node a, bool isLower) +{ + Assert(Rewriter::rewrite(a) == a); + Node ret; + if (a.isConst()) + { + ret = a; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + if (isLower) + { + ret = NodeManager::currentNM()->mkConst(Rational(0)); + } + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + std::vector<Node> children; + bool success = true; + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + Node ac = getConstantArithBound(a[i], isLower); + if (ac.isNull()) + { + ret = ac; + success = false; + break; + } + else + { + if (ac.getConst<Rational>().sgn() == 0) + { + if (a.getKind() == kind::MULT) + { + ret = ac; + success = false; + break; + } + } + else + { + if (a.getKind() == kind::MULT) + { + if ((ac.getConst<Rational>().sgn() > 0) != isLower) + { + ret = Node::null(); + success = false; + break; + } + } + children.push_back(ac); + } + } + } + if (success) + { + if (children.empty()) + { + ret = NodeManager::currentNM()->mkConst(Rational(0)); + } + else if (children.size() == 1) + { + ret = children[0]; + } + else + { + ret = NodeManager::currentNM()->mkNode(a.getKind(), children); + ret = Rewriter::rewrite(ret); + } + } + } + Trace("strings-rewrite-cbound") + << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a + << " is " << ret << std::endl; + Assert(ret.isNull() || ret.isConst()); + // entailment check should be at least as powerful as computing a lower bound + Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() < 0 + || checkEntailArith(a, false)); + Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() <= 0 + || checkEntailArith(a, true)); + return ret; +} + +Node SequencesRewriter::getFixedLengthForRegexp(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + if (n.getKind() == STRING_TO_REGEXP) + { + Node ret = nm->mkNode(STRING_LENGTH, n[0]); + ret = Rewriter::rewrite(ret); + if (ret.isConst()) + { + return ret; + } + } + else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) + { + return nm->mkConst(Rational(1)); + } + else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) + { + Node ret; + for (const Node& nc : n) + { + Node flc = getFixedLengthForRegexp(nc); + if (flc.isNull() || (!ret.isNull() && ret != flc)) + { + return Node::null(); + } + else if (ret.isNull()) + { + // first time + ret = flc; + } + } + return ret; + } + else if (n.getKind() == REGEXP_CONCAT) + { + NodeBuilder<> nb(PLUS); + for (const Node& nc : n) + { + Node flc = getFixedLengthForRegexp(nc); + if (flc.isNull()) + { + return flc; + } + nb << flc; + } + Node ret = nb.constructNode(); + ret = Rewriter::rewrite(ret); + return ret; + } + return Node::null(); +} + +bool SequencesRewriter::checkEntailArithInternal(Node a) +{ + Assert(Rewriter::rewrite(a) == a); + // check whether a >= 0 + if (a.isConst()) + { + return a.getConst<Rational>().sgn() >= 0; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + // str.len( t ) >= 0 + return true; + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + if (!checkEntailArithInternal(a[i])) + { + return false; + } + } + // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0 + return true; + } + + return false; +} + +Node SequencesRewriter::decomposeSubstrChain(Node s, + std::vector<Node>& ss, + std::vector<Node>& ls) +{ + Assert(ss.empty()); + Assert(ls.empty()); + while (s.getKind() == STRING_SUBSTR) + { + ss.push_back(s[1]); + ls.push_back(s[2]); + s = s[0]; + } + std::reverse(ss.begin(), ss.end()); + std::reverse(ls.begin(), ls.end()); + return s; +} + +Node SequencesRewriter::mkSubstrChain(Node base, + const std::vector<Node>& ss, + const std::vector<Node>& ls) +{ + NodeManager* nm = NodeManager::currentNM(); + for (unsigned i = 0, size = ss.size(); i < size; i++) + { + base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); + } + return base; +} + +Node SequencesRewriter::getStringOrEmpty(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node res; + while (res.isNull()) + { + switch (n.getKind()) + { + case kind::STRING_STRREPL: + { + Node empty = nm->mkConst(::CVC4::String("")); + if (n[0] == empty) + { + // (str.replace "" x y) --> y + n = n[2]; + break; + } + + if (checkEntailLengthOne(n[0]) && n[2] == empty) + { + // (str.replace "A" x "") --> "A" + res = n[0]; + break; + } + + res = n; + break; + } + case kind::STRING_SUBSTR: + { + if (checkEntailLengthOne(n[0])) + { + // (str.substr "A" x y) --> "A" + res = n[0]; + break; + } + res = n; + break; + } + default: + { + res = n; + break; + } + } + } + return res; +} + +bool SequencesRewriter::inferZerosInSumGeq(Node x, + std::vector<Node>& ys, + std::vector<Node>& zeroYs) +{ + Assert(zeroYs.empty()); + + NodeManager* nm = NodeManager::currentNM(); + + // Check if we can show that y1 + ... + yn >= x + Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0]; + if (!checkEntailArith(sum, x)) + { + return false; + } + + // Try to remove yi one-by-one and check if we can still show: + // + // y1 + ... + yi-1 + yi+1 + ... + yn >= x + // + // If that's the case, we know that yi can be zero and the inequality still + // holds. + size_t i = 0; + while (i < ys.size()) + { + Node yi = ys[i]; + std::vector<Node>::iterator pos = ys.erase(ys.begin() + i); + if (ys.size() > 1) + { + sum = nm->mkNode(PLUS, ys); + } + else + { + sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0)); + } + + if (checkEntailArith(sum, x)) + { + zeroYs.push_back(yi); + } + else + { + ys.insert(pos, yi); + i++; + } + } + return true; +} + +Node SequencesRewriter::inferEqsFromContains(Node x, Node y) +{ + NodeManager* nm = NodeManager::currentNM(); + Node emp = nm->mkConst(String("")); + + Node xLen = nm->mkNode(STRING_LENGTH, x); + std::vector<Node> yLens; + if (y.getKind() != STRING_CONCAT) + { + yLens.push_back(nm->mkNode(STRING_LENGTH, y)); + } + else + { + for (const Node& yi : y) + { + yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); + } + } + + std::vector<Node> zeroLens; + if (x == emp) + { + // If x is the empty string, then all ys must be empty, too, and we can + // skip the expensive checks. Note that this is just a performance + // optimization. + zeroLens.swap(yLens); + } + else + { + // Check if we can infer that str.len(x) <= str.len(y). If that is the + // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + + // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality + // true. The terms that can have length zero without making the inequality + // false must be all be empty if (str.contains x y) is true. + if (!inferZerosInSumGeq(xLen, yLens, zeroLens)) + { + // We could not prove that the inequality holds + return Node::null(); + } + else if (yLens.size() == y.getNumChildren()) + { + // We could only prove that the inequality holds but not that any of the + // ys must be empty + return nm->mkNode(EQUAL, x, y); + } + } + + if (y.getKind() != STRING_CONCAT) + { + if (zeroLens.size() == 1) + { + // y is not a concatenation and we found that it must be empty, so just + // return (= y "") + Assert(zeroLens[0][0] == y); + return nm->mkNode(EQUAL, y, emp); + } + else + { + Assert(yLens.size() == 1 && yLens[0][0] == y); + return nm->mkNode(EQUAL, x, y); + } + } + + std::vector<Node> cs; + for (const Node& yiLen : yLens) + { + Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); + cs.push_back(yiLen[0]); + } + + NodeBuilder<> nb(AND); + // (= x (str.++ y1' ... ym')) + if (!cs.empty()) + { + nb << nm->mkNode(EQUAL, x, utils::mkConcat(STRING_CONCAT, cs)); + } + // (= y1'' "") ... (= yk'' "") + for (const Node& zeroLen : zeroLens) + { + Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); + nb << nm->mkNode(EQUAL, zeroLen[0], emp); + } + + // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + return nb.constructNode(); +} + +std::pair<bool, std::vector<Node> > SequencesRewriter::collectEmptyEqs(Node x) +{ + NodeManager* nm = NodeManager::currentNM(); + Node empty = nm->mkConst(::CVC4::String("")); + + // Collect the equalities of the form (= x "") (sorted) + std::set<TNode> emptyNodes; + bool allEmptyEqs = true; + if (x.getKind() == kind::EQUAL) + { + if (x[0] == empty) + { + emptyNodes.insert(x[1]); + } + else if (x[1] == empty) + { + emptyNodes.insert(x[0]); + } + else + { + allEmptyEqs = false; + } + } + else if (x.getKind() == kind::AND) + { + for (const Node& c : x) + { + if (c.getKind() == kind::EQUAL) + { + if (c[0] == empty) + { + emptyNodes.insert(c[1]); + } + else if (c[1] == empty) + { + emptyNodes.insert(c[0]); + } + } + else + { + allEmptyEqs = false; + } + } + } + + if (emptyNodes.size() == 0) + { + allEmptyEqs = false; + } + + return std::make_pair( + allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end())); +} + +Node SequencesRewriter::returnRewrite(Node node, Node ret, const char* c) +{ + Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << c + << "." << std::endl; + + NodeManager* nm = NodeManager::currentNM(); + + // standard post-processing + // We rewrite (string) equalities immediately here. This allows us to forego + // the standard invariant on equality rewrites (that s=t must rewrite to one + // of { s=t, t=s, true, false } ). + Kind retk = ret.getKind(); + if (retk == OR || retk == AND) + { + std::vector<Node> children; + bool childChanged = false; + for (const Node& cret : ret) + { + Node creter = cret; + if (cret.getKind() == EQUAL) + { + creter = rewriteEqualityExt(cret); + } + else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL) + { + creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0])); + } + childChanged = childChanged || cret != creter; + children.push_back(creter); + } + if (childChanged) + { + ret = nm->mkNode(retk, children); + } + } + else if (retk == NOT && ret[0].getKind() == EQUAL) + { + ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0])); + } + else if (retk == EQUAL && node.getKind() != EQUAL) + { + Trace("strings-rewrite") + << "Apply extended equality rewrite on " << ret << std::endl; + ret = rewriteEqualityExt(ret); + } + return ret; +} |