summaryrefslogtreecommitdiff
path: root/src/theory
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-04-03 09:43:12 -0500
committerGitHub <noreply@github.com>2020-04-03 09:43:12 -0500
commitd91b52085d7e3bbda65117c0cd88433aed383aff (patch)
tree5ed2055704066d28a3247a82030ed44bfeda4a57 /src/theory
parente24e6f3620996ee9e5010d30fefc51247cc55fdc (diff)
Split sequences rewriter (#4194)
This is in preparation for making the strings rewriter configurable for stats. This moves all utility functions from SequencesRewriter to a proper place. This includes three new groupings of utility functions: those involving arithmetic entailments, those involving string entailments, those involving regular expression entailments. Here, "entailments" loosely means any question regarding a (set of) terms or formulas. No major code changes. Added some missing documentation and lightly cleaned a few blocks of code in cpp.
Diffstat (limited to 'src/theory')
-rw-r--r--src/theory/strings/arith_entail.cpp865
-rw-r--r--src/theory/strings/arith_entail.h180
-rw-r--r--src/theory/strings/core_solver.cpp13
-rw-r--r--src/theory/strings/regexp_elim.cpp4
-rw-r--r--src/theory/strings/regexp_entail.cpp623
-rw-r--r--src/theory/strings/regexp_entail.h117
-rw-r--r--src/theory/strings/regexp_operation.cpp29
-rw-r--r--src/theory/strings/regexp_operation.h2
-rw-r--r--src/theory/strings/regexp_solver.cpp2
-rw-r--r--src/theory/strings/sequences_rewriter.cpp2730
-rw-r--r--src/theory/strings/sequences_rewriter.h588
-rw-r--r--src/theory/strings/skolem_cache.cpp6
-rw-r--r--src/theory/strings/strings_entail.cpp994
-rw-r--r--src/theory/strings/strings_entail.h371
-rw-r--r--src/theory/strings/theory_strings_preprocess.cpp3
-rw-r--r--src/theory/strings/theory_strings_utils.cpp89
-rw-r--r--src/theory/strings/theory_strings_utils.h37
-rw-r--r--src/theory/strings/word.cpp27
-rw-r--r--src/theory/strings/word.h13
19 files changed, 3460 insertions, 3233 deletions
diff --git a/src/theory/strings/arith_entail.cpp b/src/theory/strings/arith_entail.cpp
new file mode 100644
index 000000000..5933f2586
--- /dev/null
+++ b/src/theory/strings/arith_entail.cpp
@@ -0,0 +1,865 @@
+/********************* */
+/*! \file arith_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of arithmetic entailment computation for string terms.
+ **/
+
+#include "theory/strings/arith_entail.h"
+
+#include "expr/attribute.h"
+#include "theory/arith/arith_msum.h"
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+#include "theory/theory.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+bool ArithEntail::checkEq(Node a, Node b)
+{
+ if (a == b)
+ {
+ return true;
+ }
+ Node ar = Rewriter::rewrite(a);
+ Node br = Rewriter::rewrite(b);
+ return ar == br;
+}
+
+bool ArithEntail::check(Node a, Node b, bool strict)
+{
+ if (a == b)
+ {
+ return !strict;
+ }
+ Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b);
+ return check(diff, strict);
+}
+
+struct StrCheckEntailArithTag
+{
+};
+struct StrCheckEntailArithComputedTag
+{
+};
+/** Attribute true for expressions for which check returned true */
+typedef expr::Attribute<StrCheckEntailArithTag, bool> StrCheckEntailArithAttr;
+typedef expr::Attribute<StrCheckEntailArithComputedTag, bool>
+ StrCheckEntailArithComputedAttr;
+
+bool ArithEntail::check(Node a, bool strict)
+{
+ if (a.isConst())
+ {
+ return a.getConst<Rational>().sgn() >= (strict ? 1 : 0);
+ }
+
+ Node ar =
+ strict
+ ? NodeManager::currentNM()->mkNode(
+ kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1)))
+ : a;
+ ar = Rewriter::rewrite(ar);
+
+ if (ar.getAttribute(StrCheckEntailArithComputedAttr()))
+ {
+ return ar.getAttribute(StrCheckEntailArithAttr());
+ }
+
+ bool ret = checkInternal(ar);
+ if (!ret)
+ {
+ // try with approximations
+ ret = checkApprox(ar);
+ }
+ // cache the result
+ ar.setAttribute(StrCheckEntailArithAttr(), ret);
+ ar.setAttribute(StrCheckEntailArithComputedAttr(), true);
+ return ret;
+}
+
+bool ArithEntail::checkApprox(Node ar)
+{
+ Assert(Rewriter::rewrite(ar) == ar);
+ NodeManager* nm = NodeManager::currentNM();
+ std::map<Node, Node> msum;
+ Trace("strings-ent-approx-debug")
+ << "Setup arithmetic approximations for " << ar << std::endl;
+ if (!ArithMSum::getMonomialSum(ar, msum))
+ {
+ Trace("strings-ent-approx-debug")
+ << "...failed to get monomial sum!" << std::endl;
+ return false;
+ }
+ // for each monomial v*c, mApprox[v] a list of
+ // possibilities for how the term can be soundly approximated, that is,
+ // if mApprox[v] contains av, then v*c > av*c. Notice that if c
+ // is positive, then v > av, otherwise if c is negative, then v < av.
+ // In other words, av is an under-approximation if c is positive, and an
+ // over-approximation if c is negative.
+ bool changed = false;
+ std::map<Node, std::vector<Node> > mApprox;
+ // map from approximations to their monomial sums
+ std::map<Node, std::map<Node, Node> > approxMsums;
+ // aarSum stores each monomial that does not have multiple approximations
+ std::vector<Node> aarSum;
+ for (std::pair<const Node, Node>& m : msum)
+ {
+ Node v = m.first;
+ Node c = m.second;
+ Trace("strings-ent-approx-debug")
+ << "Get approximations " << v << "..." << std::endl;
+ if (v.isNull())
+ {
+ Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c;
+ aarSum.push_back(mn);
+ }
+ else
+ {
+ // c.isNull() means c = 1
+ bool isOverApprox = !c.isNull() && c.getConst<Rational>().sgn() == -1;
+ std::vector<Node>& approx = mApprox[v];
+ std::unordered_set<Node, NodeHashFunction> visited;
+ std::vector<Node> toProcess;
+ toProcess.push_back(v);
+ do
+ {
+ Node curr = toProcess.back();
+ Trace("strings-ent-approx-debug") << " process " << curr << std::endl;
+ curr = Rewriter::rewrite(curr);
+ toProcess.pop_back();
+ if (visited.find(curr) == visited.end())
+ {
+ visited.insert(curr);
+ std::vector<Node> currApprox;
+ getArithApproximations(curr, currApprox, isOverApprox);
+ if (currApprox.empty())
+ {
+ Trace("strings-ent-approx-debug")
+ << "...approximation: " << curr << std::endl;
+ // no approximations, thus curr is a possibility
+ approx.push_back(curr);
+ }
+ else
+ {
+ toProcess.insert(
+ toProcess.end(), currApprox.begin(), currApprox.end());
+ }
+ }
+ } while (!toProcess.empty());
+ Assert(!approx.empty());
+ // if we have only one approximation, move it to final
+ if (approx.size() == 1)
+ {
+ changed = v != approx[0];
+ Node mn = ArithMSum::mkCoeffTerm(c, approx[0]);
+ aarSum.push_back(mn);
+ mApprox.erase(v);
+ }
+ else
+ {
+ // compute monomial sum form for each approximation, used below
+ for (const Node& aa : approx)
+ {
+ if (approxMsums.find(aa) == approxMsums.end())
+ {
+ CVC4_UNUSED bool ret =
+ ArithMSum::getMonomialSum(aa, approxMsums[aa]);
+ Assert(ret);
+ }
+ }
+ changed = true;
+ }
+ }
+ }
+ if (!changed)
+ {
+ // approximations had no effect, return
+ Trace("strings-ent-approx-debug") << "...no approximations" << std::endl;
+ return false;
+ }
+ // get the current "fixed" sum for the abstraction of ar
+ Node aar = aarSum.empty()
+ ? nm->mkConst(Rational(0))
+ : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum));
+ aar = Rewriter::rewrite(aar);
+ Trace("strings-ent-approx-debug")
+ << "...processed fixed sum " << aar << " with " << mApprox.size()
+ << " approximated monomials." << std::endl;
+ // if we have a choice of how to approximate
+ if (!mApprox.empty())
+ {
+ // convert aar back to monomial sum
+ std::map<Node, Node> msumAar;
+ if (!ArithMSum::getMonomialSum(aar, msumAar))
+ {
+ return false;
+ }
+ if (Trace.isOn("strings-ent-approx"))
+ {
+ Trace("strings-ent-approx")
+ << "---- Check arithmetic entailment by under-approximation " << ar
+ << " >= 0" << std::endl;
+ Trace("strings-ent-approx") << "FIXED:" << std::endl;
+ ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx");
+ Trace("strings-ent-approx") << "APPROX:" << std::endl;
+ for (std::pair<const Node, std::vector<Node> >& a : mApprox)
+ {
+ Node c = msum[a.first];
+ Trace("strings-ent-approx") << " ";
+ if (!c.isNull())
+ {
+ Trace("strings-ent-approx") << c << " * ";
+ }
+ Trace("strings-ent-approx")
+ << a.second << " ...from " << a.first << std::endl;
+ }
+ Trace("strings-ent-approx") << std::endl;
+ }
+ Rational one(1);
+ // incorporate monomials one at a time that have a choice of approximations
+ while (!mApprox.empty())
+ {
+ Node v;
+ Node vapprox;
+ int maxScore = -1;
+ // Look at each approximation, take the one with the best score.
+ // Notice that we are in the process of trying to prove
+ // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0,
+ // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar)
+ // and approx_1 ... approx_m are possible approximations. The
+ // intution here is that we want coefficients c1...cn to be positive.
+ // This is because arithmetic string terms t1...tn (which may be
+ // applications of len, indexof, str.to.int) are never entailed to be
+ // negative. Hence, we add the approx_i that contributes the "most"
+ // towards making all constants c1...cn positive and cancelling negative
+ // monomials in approx_i itself.
+ for (std::pair<const Node, std::vector<Node> >& nam : mApprox)
+ {
+ Node cr = msum[nam.first];
+ for (const Node& aa : nam.second)
+ {
+ unsigned helpsCancelCount = 0;
+ unsigned addsObligationCount = 0;
+ std::map<Node, Node>::iterator it;
+ // we are processing an approximation cr*( c1*t1 + ... + cn*tn )
+ for (std::pair<const Node, Node>& aam : approxMsums[aa])
+ {
+ // Say aar is of the form t + c*ti, and aam is the monomial ci*ti
+ // where ci != 0. We say aam:
+ // (1) helps cancel if c != 0 and c>0 != ci>0
+ // (2) adds obligation if c>=0 and c+ci<0
+ Node ti = aam.first;
+ Node ci = aam.second;
+ if (!cr.isNull())
+ {
+ ci = ci.isNull() ? cr
+ : Rewriter::rewrite(nm->mkNode(MULT, ci, cr));
+ }
+ Trace("strings-ent-approx-debug") << ci << "*" << ti << " ";
+ int ciSgn = ci.isNull() ? 1 : ci.getConst<Rational>().sgn();
+ it = msumAar.find(ti);
+ if (it != msumAar.end())
+ {
+ Node c = it->second;
+ int cSgn = c.isNull() ? 1 : c.getConst<Rational>().sgn();
+ if (cSgn == 0)
+ {
+ addsObligationCount += (ciSgn == -1 ? 1 : 0);
+ }
+ else if (cSgn != ciSgn)
+ {
+ helpsCancelCount++;
+ Rational r1 = c.isNull() ? one : c.getConst<Rational>();
+ Rational r2 = ci.isNull() ? one : ci.getConst<Rational>();
+ Rational r12 = r1 + r2;
+ if (r12.sgn() == -1)
+ {
+ addsObligationCount++;
+ }
+ }
+ }
+ else
+ {
+ addsObligationCount += (ciSgn == -1 ? 1 : 0);
+ }
+ }
+ Trace("strings-ent-approx-debug")
+ << "counts=" << helpsCancelCount << "," << addsObligationCount
+ << " for " << aa << " into " << aar << std::endl;
+ int score = (addsObligationCount > 0 ? 0 : 2)
+ + (helpsCancelCount > 0 ? 1 : 0);
+ // if its the best, update v and vapprox
+ if (v.isNull() || score > maxScore)
+ {
+ v = nam.first;
+ vapprox = aa;
+ maxScore = score;
+ }
+ }
+ if (!v.isNull())
+ {
+ break;
+ }
+ }
+ Trace("strings-ent-approx")
+ << "- Decide " << v << " = " << vapprox << std::endl;
+ // we incorporate v approximated by vapprox into the overall approximation
+ // for ar
+ Assert(!v.isNull() && !vapprox.isNull());
+ Assert(msum.find(v) != msum.end());
+ Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox);
+ aar = nm->mkNode(PLUS, aar, mn);
+ // update the msumAar map
+ aar = Rewriter::rewrite(aar);
+ msumAar.clear();
+ if (!ArithMSum::getMonomialSum(aar, msumAar))
+ {
+ Assert(false);
+ Trace("strings-ent-approx")
+ << "...failed to get monomial sum!" << std::endl;
+ return false;
+ }
+ // we have processed the approximation for v
+ mApprox.erase(v);
+ }
+ Trace("strings-ent-approx") << "-----------------" << std::endl;
+ }
+ if (aar == ar)
+ {
+ Trace("strings-ent-approx-debug")
+ << "...approximation had no effect" << std::endl;
+ // this should never happen, but we avoid the infinite loop for sanity here
+ Assert(false);
+ return false;
+ }
+ // Check entailment on the approximation of ar.
+ // Notice that this may trigger further reasoning by approximation. For
+ // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be
+ // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on
+ // this call, where in the recursive call we may over-approximate
+ // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer
+ // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two
+ // steps.
+ if (check(aar))
+ {
+ Trace("strings-ent-approx")
+ << "*** StrArithApprox: showed " << ar
+ << " >= 0 using under-approximation!" << std::endl;
+ Trace("strings-ent-approx")
+ << "*** StrArithApprox: under-approximation was " << aar << std::endl;
+ return true;
+ }
+ return false;
+}
+
+void ArithEntail::getArithApproximations(Node a,
+ std::vector<Node>& approx,
+ bool isOverApprox)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ // We do not handle PLUS here since this leads to exponential behavior.
+ // Instead, this is managed, e.g. during checkApprox, where
+ // PLUS terms are expanded "on-demand" during the reasoning.
+ Trace("strings-ent-approx-debug")
+ << "Get arith approximations " << a << std::endl;
+ Kind ak = a.getKind();
+ if (ak == MULT)
+ {
+ Node c;
+ Node v;
+ if (ArithMSum::getMonomial(a, c, v))
+ {
+ bool isNeg = c.getConst<Rational>().sgn() == -1;
+ getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox);
+ for (unsigned i = 0, size = approx.size(); i < size; i++)
+ {
+ approx[i] = nm->mkNode(MULT, c, approx[i]);
+ }
+ }
+ }
+ else if (ak == STRING_LENGTH)
+ {
+ Kind aak = a[0].getKind();
+ if (aak == STRING_SUBSTR)
+ {
+ // over,under-approximations for len( substr( x, n, m ) )
+ Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
+ if (isOverApprox)
+ {
+ // m >= 0 implies
+ // m >= len( substr( x, n, m ) )
+ if (check(a[0][2]))
+ {
+ approx.push_back(a[0][2]);
+ }
+ if (check(lenx, a[0][1]))
+ {
+ // n <= len( x ) implies
+ // len( x ) - n >= len( substr( x, n, m ) )
+ approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
+ }
+ else
+ {
+ // len( x ) >= len( substr( x, n, m ) )
+ approx.push_back(lenx);
+ }
+ }
+ else
+ {
+ // 0 <= n and n+m <= len( x ) implies
+ // m <= len( substr( x, n, m ) )
+ Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]);
+ if (check(a[0][1]) && check(lenx, npm))
+ {
+ approx.push_back(a[0][2]);
+ }
+ // 0 <= n and n+m >= len( x ) implies
+ // len(x)-n <= len( substr( x, n, m ) )
+ if (check(a[0][1]) && check(npm, lenx))
+ {
+ approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
+ }
+ }
+ }
+ else if (aak == STRING_STRREPL)
+ {
+ // over,under-approximations for len( replace( x, y, z ) )
+ // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) )
+ Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
+ Node leny = nm->mkNode(STRING_LENGTH, a[0][1]);
+ Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]);
+ if (isOverApprox)
+ {
+ if (check(leny, lenz))
+ {
+ // len( y ) >= len( z ) implies
+ // len( x ) >= len( replace( x, y, z ) )
+ approx.push_back(lenx);
+ }
+ else
+ {
+ // len( x ) + len( z ) >= len( replace( x, y, z ) )
+ approx.push_back(nm->mkNode(PLUS, lenx, lenz));
+ }
+ }
+ else
+ {
+ if (check(lenz, leny) || check(lenz, lenx))
+ {
+ // len( y ) <= len( z ) or len( x ) <= len( z ) implies
+ // len( x ) <= len( replace( x, y, z ) )
+ approx.push_back(lenx);
+ }
+ else
+ {
+ // len( x ) - len( y ) <= len( replace( x, y, z ) )
+ approx.push_back(nm->mkNode(MINUS, lenx, leny));
+ }
+ }
+ }
+ else if (aak == STRING_ITOS)
+ {
+ // over,under-approximations for len( int.to.str( x ) )
+ if (isOverApprox)
+ {
+ if (check(a[0][0], false))
+ {
+ if (check(a[0][0], true))
+ {
+ // x > 0 implies
+ // x >= len( int.to.str( x ) )
+ approx.push_back(a[0][0]);
+ }
+ else
+ {
+ // x >= 0 implies
+ // x+1 >= len( int.to.str( x ) )
+ approx.push_back(
+ nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0]));
+ }
+ }
+ }
+ else
+ {
+ if (check(a[0][0]))
+ {
+ // x >= 0 implies
+ // len( int.to.str( x ) ) >= 1
+ approx.push_back(nm->mkConst(Rational(1)));
+ }
+ // other crazy things are possible here, e.g.
+ // len( int.to.str( len( y ) + 10 ) ) >= 2
+ }
+ }
+ }
+ else if (ak == STRING_STRIDOF)
+ {
+ // over,under-approximations for indexof( x, y, n )
+ if (isOverApprox)
+ {
+ Node lenx = nm->mkNode(STRING_LENGTH, a[0]);
+ Node leny = nm->mkNode(STRING_LENGTH, a[1]);
+ if (check(lenx, leny))
+ {
+ // len( x ) >= len( y ) implies
+ // len( x ) - len( y ) >= indexof( x, y, n )
+ approx.push_back(nm->mkNode(MINUS, lenx, leny));
+ }
+ else
+ {
+ // len( x ) >= indexof( x, y, n )
+ approx.push_back(lenx);
+ }
+ }
+ else
+ {
+ // TODO?:
+ // contains( substr( x, n, len( x ) ), y ) implies
+ // n <= indexof( x, y, n )
+ // ...hard to test, runs risk of non-termination
+
+ // -1 <= indexof( x, y, n )
+ approx.push_back(nm->mkConst(Rational(-1)));
+ }
+ }
+ else if (ak == STRING_STOI)
+ {
+ // over,under-approximations for str.to.int( x )
+ if (isOverApprox)
+ {
+ // TODO?:
+ // y >= 0 implies
+ // y >= str.to.int( int.to.str( y ) )
+ }
+ else
+ {
+ // -1 <= str.to.int( x )
+ approx.push_back(nm->mkConst(Rational(-1)));
+ }
+ }
+ Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl;
+}
+
+bool ArithEntail::checkWithEqAssumption(Node assumption, Node a, bool strict)
+{
+ Assert(assumption.getKind() == kind::EQUAL);
+ Assert(Rewriter::rewrite(assumption) == assumption);
+
+ // Find candidates variables to compute substitutions for
+ std::unordered_set<Node, NodeHashFunction> candVars;
+ std::vector<Node> toVisit = {assumption};
+ while (!toVisit.empty())
+ {
+ Node curr = toVisit.back();
+ toVisit.pop_back();
+
+ if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT
+ || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL)
+ {
+ for (const auto& currChild : curr)
+ {
+ toVisit.push_back(currChild);
+ }
+ }
+ else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH)
+ {
+ candVars.insert(curr);
+ }
+ else if (curr.getKind() == kind::STRING_LENGTH)
+ {
+ candVars.insert(curr);
+ }
+ }
+
+ // Check if any of the candidate variables are in n
+ Node v;
+ Assert(toVisit.empty());
+ toVisit.push_back(a);
+ while (!toVisit.empty())
+ {
+ Node curr = toVisit.back();
+ toVisit.pop_back();
+
+ for (const auto& currChild : curr)
+ {
+ toVisit.push_back(currChild);
+ }
+
+ if (candVars.find(curr) != candVars.end())
+ {
+ v = curr;
+ break;
+ }
+ }
+
+ if (v.isNull())
+ {
+ // No suitable candidate found
+ return false;
+ }
+
+ Node solution = ArithMSum::solveEqualityFor(assumption, v);
+ if (solution.isNull())
+ {
+ // Could not solve for v
+ return false;
+ }
+
+ a = a.substitute(TNode(v), TNode(solution));
+ return check(a, strict);
+}
+
+bool ArithEntail::checkWithAssumption(Node assumption,
+ Node a,
+ Node b,
+ bool strict)
+{
+ Assert(Rewriter::rewrite(assumption) == assumption);
+
+ NodeManager* nm = NodeManager::currentNM();
+
+ if (!assumption.isConst() && assumption.getKind() != kind::EQUAL)
+ {
+ // We rewrite inequality assumptions from x <= y to x + (str.len s) = y
+ // where s is some fresh string variable. We use (str.len s) because
+ // (str.len s) must be non-negative for the equation to hold.
+ Node x, y;
+ if (assumption.getKind() == kind::GEQ)
+ {
+ x = assumption[0];
+ y = assumption[1];
+ }
+ else
+ {
+ // (not (>= s t)) --> (>= (t - 1) s)
+ Assert(assumption.getKind() == kind::NOT
+ && assumption[0].getKind() == kind::GEQ);
+ x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1)));
+ y = assumption[0][0];
+ }
+
+ Node s = nm->mkBoundVar("slackVal", nm->stringType());
+ Node slen = nm->mkNode(kind::STRING_LENGTH, s);
+ assumption = Rewriter::rewrite(
+ nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen)));
+ }
+
+ Node diff = nm->mkNode(kind::MINUS, a, b);
+ bool res = false;
+ if (assumption.isConst())
+ {
+ bool assumptionBool = assumption.getConst<bool>();
+ if (assumptionBool)
+ {
+ res = check(diff, strict);
+ }
+ else
+ {
+ res = true;
+ }
+ }
+ else
+ {
+ res = checkWithEqAssumption(assumption, diff, strict);
+ }
+ return res;
+}
+
+bool ArithEntail::checkWithAssumptions(std::vector<Node> assumptions,
+ Node a,
+ Node b,
+ bool strict)
+{
+ // TODO: We currently try to show the entailment with each assumption
+ // independently. In the future, we should make better use of multiple
+ // assumptions.
+ bool res = false;
+ for (const auto& assumption : assumptions)
+ {
+ Assert(Rewriter::rewrite(assumption) == assumption);
+
+ if (checkWithAssumption(assumption, a, b, strict))
+ {
+ res = true;
+ break;
+ }
+ }
+ return res;
+}
+
+Node ArithEntail::getConstantBound(Node a, bool isLower)
+{
+ Assert(Rewriter::rewrite(a) == a);
+ Node ret;
+ if (a.isConst())
+ {
+ ret = a;
+ }
+ else if (a.getKind() == kind::STRING_LENGTH)
+ {
+ if (isLower)
+ {
+ ret = NodeManager::currentNM()->mkConst(Rational(0));
+ }
+ }
+ else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
+ {
+ std::vector<Node> children;
+ bool success = true;
+ for (unsigned i = 0; i < a.getNumChildren(); i++)
+ {
+ Node ac = getConstantBound(a[i], isLower);
+ if (ac.isNull())
+ {
+ ret = ac;
+ success = false;
+ break;
+ }
+ else
+ {
+ if (ac.getConst<Rational>().sgn() == 0)
+ {
+ if (a.getKind() == kind::MULT)
+ {
+ ret = ac;
+ success = false;
+ break;
+ }
+ }
+ else
+ {
+ if (a.getKind() == kind::MULT)
+ {
+ if ((ac.getConst<Rational>().sgn() > 0) != isLower)
+ {
+ ret = Node::null();
+ success = false;
+ break;
+ }
+ }
+ children.push_back(ac);
+ }
+ }
+ }
+ if (success)
+ {
+ if (children.empty())
+ {
+ ret = NodeManager::currentNM()->mkConst(Rational(0));
+ }
+ else if (children.size() == 1)
+ {
+ ret = children[0];
+ }
+ else
+ {
+ ret = NodeManager::currentNM()->mkNode(a.getKind(), children);
+ ret = Rewriter::rewrite(ret);
+ }
+ }
+ }
+ Trace("strings-rewrite-cbound")
+ << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a
+ << " is " << ret << std::endl;
+ Assert(ret.isNull() || ret.isConst());
+ // entailment check should be at least as powerful as computing a lower bound
+ Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() < 0
+ || check(a, false));
+ Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() <= 0
+ || check(a, true));
+ return ret;
+}
+
+bool ArithEntail::checkInternal(Node a)
+{
+ Assert(Rewriter::rewrite(a) == a);
+ // check whether a >= 0
+ if (a.isConst())
+ {
+ return a.getConst<Rational>().sgn() >= 0;
+ }
+ else if (a.getKind() == kind::STRING_LENGTH)
+ {
+ // str.len( t ) >= 0
+ return true;
+ }
+ else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
+ {
+ for (unsigned i = 0; i < a.getNumChildren(); i++)
+ {
+ if (!checkInternal(a[i]))
+ {
+ return false;
+ }
+ }
+ // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0
+ return true;
+ }
+
+ return false;
+}
+
+bool ArithEntail::inferZerosInSumGeq(Node x,
+ std::vector<Node>& ys,
+ std::vector<Node>& zeroYs)
+{
+ Assert(zeroYs.empty());
+
+ NodeManager* nm = NodeManager::currentNM();
+
+ // Check if we can show that y1 + ... + yn >= x
+ Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0];
+ if (!check(sum, x))
+ {
+ return false;
+ }
+
+ // Try to remove yi one-by-one and check if we can still show:
+ //
+ // y1 + ... + yi-1 + yi+1 + ... + yn >= x
+ //
+ // If that's the case, we know that yi can be zero and the inequality still
+ // holds.
+ size_t i = 0;
+ while (i < ys.size())
+ {
+ Node yi = ys[i];
+ std::vector<Node>::iterator pos = ys.erase(ys.begin() + i);
+ if (ys.size() > 1)
+ {
+ sum = nm->mkNode(PLUS, ys);
+ }
+ else
+ {
+ sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0));
+ }
+
+ if (check(sum, x))
+ {
+ zeroYs.push_back(yi);
+ }
+ else
+ {
+ ys.insert(pos, yi);
+ i++;
+ }
+ }
+ return true;
+}
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
diff --git a/src/theory/strings/arith_entail.h b/src/theory/strings/arith_entail.h
new file mode 100644
index 000000000..266f726c7
--- /dev/null
+++ b/src/theory/strings/arith_entail.h
@@ -0,0 +1,180 @@
+/********************* */
+/*! \file arith_entail.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Arithmetic entailment computation for string terms.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__ARITH_ENTAIL_H
+#define CVC4__THEORY__STRINGS__ARITH_ENTAIL_H
+
+#include <vector>
+
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/**
+ * Techniques for computing arithmetic entailment for string terms. This
+ * is an implementation of the techniques from Reynolds et al, "High Level
+ * Abstractions for Simplifying Extended String Constraints in SMT", CAV 2019.
+ */
+class ArithEntail
+{
+ public:
+ /** check arithmetic entailment equal
+ * Returns true if it is always the case that a = b.
+ */
+ static bool checkEq(Node a, Node b);
+ /** check arithmetic entailment
+ * Returns true if it is always the case that a >= b,
+ * and a>b if strict is true.
+ */
+ static bool check(Node a, Node b, bool strict = false);
+ /** check arithmetic entailment
+ * Returns true if it is always the case that a >= 0.
+ */
+ static bool check(Node a, bool strict = false);
+ /** check arithmetic entailment with approximations
+ *
+ * Returns true if it is always the case that a >= 0. We expect that a is in
+ * rewritten form.
+ *
+ * This function uses "approximation" techniques that under-approximate
+ * the value of a for the purposes of showing the entailment holds. For
+ * example, given:
+ * len( x ) - len( substr( y, 0, len( x ) ) )
+ * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above
+ * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0,
+ * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0
+ * holds.
+ */
+ static bool checkApprox(Node a);
+
+ /**
+ * Checks whether assumption |= a >= 0 (if strict is false) or
+ * assumption |= a > 0 (if strict is true), where assumption is an equality
+ * assumption. The assumption must be in rewritten form.
+ *
+ * Example:
+ *
+ * checkWithEqAssumption(x + (str.len y) = 0, -x, false) = true
+ *
+ * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true
+ */
+ static bool checkWithEqAssumption(Node assumption,
+ Node a,
+ bool strict = false);
+
+ /**
+ * Checks whether assumption |= a >= b (if strict is false) or
+ * assumption |= a > b (if strict is true). The function returns true if it
+ * can be shown that the entailment holds and false otherwise. Assumption
+ * must be in rewritten form. Assumption may be an equality or an inequality.
+ *
+ * Example:
+ *
+ * checkWithAssumption(x + (str.len y) = 0, 0, x, false) = true
+ *
+ * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
+ */
+ static bool checkWithAssumption(Node assumption,
+ Node a,
+ Node b,
+ bool strict = false);
+
+ /**
+ * Checks whether assumptions |= a >= b (if strict is false) or
+ * assumptions |= a > b (if strict is true). The function returns true if it
+ * can be shown that the entailment holds and false otherwise. Assumptions
+ * must be in rewritten form. Assumptions may be an equalities or an
+ * inequalities.
+ *
+ * Example:
+ *
+ * checkWithAssumptions([x + (str.len y) = 0], 0, x, false) = true
+ *
+ * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
+ */
+ static bool checkWithAssumptions(std::vector<Node> assumptions,
+ Node a,
+ Node b,
+ bool strict = false);
+
+ /** get arithmetic lower bound
+ * If this function returns a non-null Node ret,
+ * then ret is a rational constant and
+ * we know that n >= ret always if isLower is true,
+ * or n <= ret if isLower is false.
+ *
+ * Notice the following invariant.
+ * If getConstantArithBound(a, true) = ret where ret is non-null, then for
+ * strict = { true, false } :
+ * ret >= strict ? 1 : 0
+ * if and only if
+ * check( a, strict ) = true.
+ */
+ static Node getConstantBound(Node a, bool isLower = true);
+
+ /**
+ * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the
+ * original inequality still holds. Returns true if the original inequality
+ * holds and false otherwise. The list of ys is modified to contain a subset
+ * of the original ys.
+ *
+ * Example:
+ *
+ * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] )
+ * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ]
+ * (can be used to rewrite the inequality to false)
+ *
+ * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] )
+ * --> returns false because it is not possible to show
+ * str.len(y) >= str.len(x)
+ */
+ static bool inferZerosInSumGeq(Node x,
+ std::vector<Node>& ys,
+ std::vector<Node>& zeroYs);
+
+ private:
+ /** check entail arithmetic internal
+ * Returns true if we can show a >= 0 always.
+ * a is in rewritten form.
+ */
+ static bool checkInternal(Node a);
+ /** Get arithmetic approximations
+ *
+ * This gets the (set of) arithmetic approximations for term a and stores
+ * them in approx. If isOverApprox is true, these are over-approximations
+ * for the value of a, otherwise, they are underapproximations. For example,
+ * an over-approximation for len( substr( y, n, m ) ) is m; an
+ * under-approximation for indexof( x, y, n ) is -1.
+ *
+ * Notice that this function is not generally recursive (although it may make
+ * a small bounded of recursive calls). Instead, it returns the shape
+ * of the approximations for a. For example, an under-approximation
+ * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this
+ * function might be len( substr( x, 0, n ) ) - len( y ), where we don't
+ * consider (recursively) the approximations for len( substr( x, 0, n ) ).
+ */
+ static void getArithApproximations(Node a,
+ std::vector<Node>& approx,
+ bool isOverApprox = false);
+};
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
+
+#endif
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index 3384499a2..dc076e734 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -18,6 +18,7 @@
#include "options/strings_options.h"
#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/strings_entail.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/strings/word.h"
@@ -154,7 +155,7 @@ void CoreSolver::checkFlatForms()
for (const Node& n : it->second)
{
int firstc, lastc;
- if (!SequencesRewriter::canConstantContainList(
+ if (!StringsEntail::canConstantContainList(
c, d_flat_form[n], firstc, lastc))
{
Trace("strings-ff-debug") << "Flat form for " << n
@@ -348,9 +349,8 @@ void CoreSolver::checkFlatForm(std::vector<Node>& eqc,
if (!curr_c.isNull() && !cc_c.isNull())
{
// check for constant conflict
- int index;
- Node s =
- SequencesRewriter::splitConstant(cc_c, curr_c, index, isRev);
+ size_t index;
+ Node s = Word::splitConstant(cc_c, curr_c, index, isRev);
if (s.isNull())
{
d_bsolver.explainConstantEqc(ac,curr,exp);
@@ -915,8 +915,7 @@ void CoreSolver::getNormalForms(Node eqc,
{
NormalForm& nf = normal_forms[i];
int firstc, lastc;
- if (!SequencesRewriter::canConstantContainList(
- c, nf.d_nf, firstc, lastc))
+ if (!StringsEntail::canConstantContainList(c, nf.d_nf, firstc, lastc))
{
Node n = nf.d_base;
//conflict
@@ -1918,7 +1917,7 @@ int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >&
if (!c.isNull())
{
int findex, lindex;
- if (!SequencesRewriter::canConstantContainList(
+ if (!StringsEntail::canConstantContainList(
c, i == 0 ? nfj : nfi, findex, lindex))
{
Trace("strings-solve-debug")
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp
index 259588789..d4f301e23 100644
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -17,7 +17,7 @@
#include "options/strings_options.h"
#include "theory/rewriter.h"
-#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/regexp_entail.h"
#include "theory/strings/theory_strings_utils.h"
using namespace CVC4;
@@ -71,7 +71,7 @@ Node RegExpElimination::eliminateConcat(Node atom)
for (unsigned i = 0, size = children.size(); i < size; i++)
{
Node c = children[i];
- Node fl = SequencesRewriter::getFixedLengthForRegexp(c);
+ Node fl = RegExpEntail::getFixedLengthForRegexp(c);
if (fl.isNull())
{
if (!hasPivotIndex && c.getKind() == REGEXP_STAR
diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp
new file mode 100644
index 000000000..d03893483
--- /dev/null
+++ b/src/theory/strings/regexp_entail.cpp
@@ -0,0 +1,623 @@
+/********************* */
+/*! \file regexp_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of entailment tests involving regular expressions
+ **/
+
+#include "theory/strings/regexp_entail.h"
+
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace std;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
+ std::vector<Node>& children,
+ int dir)
+{
+ Trace("regexp-ext-rewrite-debug")
+ << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
+ Trace("regexp-ext-rewrite-debug")
+ << " mchildren : " << mchildren << std::endl;
+ Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl;
+ NodeManager* nm = NodeManager::currentNM();
+ unsigned tmin = dir < 0 ? 0 : dir;
+ unsigned tmax = dir < 0 ? 1 : dir;
+ // try to remove off front and back
+ for (unsigned t = 0; t < 2; t++)
+ {
+ if (tmin <= t && t <= tmax)
+ {
+ bool do_next = true;
+ while (!children.empty() && !mchildren.empty() && do_next)
+ {
+ do_next = false;
+ Node xc = mchildren[mchildren.size() - 1];
+ Node rc = children[children.size() - 1];
+ Assert(rc.getKind() != REGEXP_CONCAT);
+ Assert(xc.getKind() != STRING_CONCAT);
+ if (rc.getKind() == STRING_TO_REGEXP)
+ {
+ if (xc == rc[0])
+ {
+ children.pop_back();
+ mchildren.pop_back();
+ do_next = true;
+ Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl;
+ }
+ else if (xc.isConst() && rc[0].isConst())
+ {
+ // split the constant
+ size_t index;
+ Node s = Word::splitConstant(xc, rc[0], index, t == 0);
+ Trace("regexp-ext-rewrite-debug")
+ << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> "
+ << s << " " << index << " " << t << std::endl;
+ if (s.isNull())
+ {
+ Trace("regexp-ext-rewrite-debug")
+ << "...return false" << std::endl;
+ return nm->mkConst(false);
+ }
+ else
+ {
+ Trace("regexp-ext-rewrite-debug")
+ << "...strip equal const" << std::endl;
+ children.pop_back();
+ mchildren.pop_back();
+ if (index == 0)
+ {
+ mchildren.push_back(s);
+ }
+ else
+ {
+ children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
+ }
+ }
+ do_next = true;
+ }
+ }
+ else if (xc.isConst())
+ {
+ // check for constants
+ CVC4::String s = xc.getConst<String>();
+ if (Word::isEmpty(xc))
+ {
+ Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl;
+ // ignore and continue
+ mchildren.pop_back();
+ do_next = true;
+ }
+ else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
+ {
+ std::vector<unsigned> ssVec;
+ ssVec.push_back(t == 0 ? s.back() : s.front());
+ CVC4::String ss(ssVec);
+ if (testConstStringInRegExp(ss, 0, rc))
+ {
+ // strip off one character
+ mchildren.pop_back();
+ if (s.size() > 1)
+ {
+ if (t == 0)
+ {
+ mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
+ }
+ else
+ {
+ mchildren.push_back(nm->mkConst(s.substr(1)));
+ }
+ }
+ children.pop_back();
+ do_next = true;
+ }
+ else
+ {
+ return nm->mkConst(false);
+ }
+ }
+ else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
+ {
+ // see if any/each child does not work
+ bool result_valid = true;
+ Node result;
+ Node emp_s = nm->mkConst(::CVC4::String(""));
+ for (unsigned i = 0; i < rc.getNumChildren(); i++)
+ {
+ std::vector<Node> mchildren_s;
+ std::vector<Node> children_s;
+ mchildren_s.push_back(xc);
+ utils::getConcat(rc[i], children_s);
+ Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
+ if (!ret.isNull())
+ {
+ // one conjunct cannot be satisfied, return false
+ if (rc.getKind() == REGEXP_INTER)
+ {
+ return ret;
+ }
+ }
+ else
+ {
+ if (children_s.empty())
+ {
+ // if we were able to fully consume, store the result
+ Assert(mchildren_s.size() <= 1);
+ if (mchildren_s.empty())
+ {
+ mchildren_s.push_back(emp_s);
+ }
+ if (result.isNull())
+ {
+ result = mchildren_s[0];
+ }
+ else if (result != mchildren_s[0])
+ {
+ result_valid = false;
+ }
+ }
+ else
+ {
+ result_valid = false;
+ }
+ }
+ }
+ if (result_valid)
+ {
+ if (result.isNull())
+ {
+ // all disjuncts cannot be satisfied, return false
+ Assert(rc.getKind() == REGEXP_UNION);
+ return nm->mkConst(false);
+ }
+ else
+ {
+ // all branches led to the same result
+ children.pop_back();
+ mchildren.pop_back();
+ if (result != emp_s)
+ {
+ mchildren.push_back(result);
+ }
+ do_next = true;
+ }
+ }
+ }
+ else if (rc.getKind() == REGEXP_STAR)
+ {
+ // check if there is no way that this star can be unrolled even once
+ std::vector<Node> mchildren_s;
+ mchildren_s.insert(
+ mchildren_s.end(), mchildren.begin(), mchildren.end());
+ if (t == 1)
+ {
+ std::reverse(mchildren_s.begin(), mchildren_s.end());
+ }
+ std::vector<Node> children_s;
+ utils::getConcat(rc[0], children_s);
+ Trace("regexp-ext-rewrite-debug")
+ << "...recursive call on body of star" << std::endl;
+ Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
+ if (!ret.isNull())
+ {
+ Trace("regexp-ext-rewrite-debug")
+ << "CRE : regexp star infeasable " << xc << " " << rc
+ << std::endl;
+ children.pop_back();
+ if (!children.empty())
+ {
+ Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl;
+ do_next = true;
+ }
+ }
+ else
+ {
+ if (children_s.empty())
+ {
+ // check if beyond this, we can't do it or there is nothing
+ // left, if so, repeat
+ bool can_skip = false;
+ if (children.size() > 1)
+ {
+ std::vector<Node> mchildren_ss;
+ mchildren_ss.insert(
+ mchildren_ss.end(), mchildren.begin(), mchildren.end());
+ std::vector<Node> children_ss;
+ children_ss.insert(
+ children_ss.end(), children.begin(), children.end() - 1);
+ if (t == 1)
+ {
+ std::reverse(mchildren_ss.begin(), mchildren_ss.end());
+ std::reverse(children_ss.begin(), children_ss.end());
+ }
+ if (simpleRegexpConsume(mchildren_ss, children_ss, t)
+ .isNull())
+ {
+ can_skip = true;
+ }
+ }
+ if (!can_skip)
+ {
+ Trace("regexp-ext-rewrite-debug")
+ << "...can't skip" << std::endl;
+ // take the result of fully consuming once
+ if (t == 1)
+ {
+ std::reverse(mchildren_s.begin(), mchildren_s.end());
+ }
+ mchildren.clear();
+ mchildren.insert(
+ mchildren.end(), mchildren_s.begin(), mchildren_s.end());
+ do_next = true;
+ }
+ else
+ {
+ Trace("regexp-ext-rewrite-debug")
+ << "...can skip " << rc << " from " << xc << std::endl;
+ }
+ }
+ }
+ }
+ }
+ if (!do_next)
+ {
+ Trace("regexp-ext-rewrite")
+ << "Cannot consume : " << xc << " " << rc << std::endl;
+ }
+ }
+ }
+ if (dir != 0)
+ {
+ std::reverse(children.begin(), children.end());
+ std::reverse(mchildren.begin(), mchildren.end());
+ }
+ }
+ return Node::null();
+}
+
+bool RegExpEntail::isConstRegExp(TNode t)
+{
+ if (t.getKind() == STRING_TO_REGEXP)
+ {
+ return t[0].isConst();
+ }
+ else if (t.isVar())
+ {
+ return false;
+ }
+ for (unsigned i = 0; i < t.getNumChildren(); ++i)
+ {
+ if (!isConstRegExp(t[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool RegExpEntail::testConstStringInRegExp(CVC4::String& s,
+ unsigned index_start,
+ TNode r)
+{
+ Assert(index_start <= s.size());
+ Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
+ << index_start << std::endl;
+ Assert(!r.isVar());
+ Kind k = r.getKind();
+ switch (k)
+ {
+ case STRING_TO_REGEXP:
+ {
+ CVC4::String s2 = s.substr(index_start, s.size() - index_start);
+ if (r[0].isConst())
+ {
+ return (s2 == r[0].getConst<String>());
+ }
+ Assert(false) << "RegExp contains variables";
+ return false;
+ }
+ case REGEXP_CONCAT:
+ {
+ if (s.size() != index_start)
+ {
+ std::vector<int> vec_k(r.getNumChildren(), -1);
+ int start = 0;
+ int left = (int)s.size() - index_start;
+ int i = 0;
+ while (i < (int)r.getNumChildren())
+ {
+ bool flag = true;
+ if (i == (int)r.getNumChildren() - 1)
+ {
+ if (testConstStringInRegExp(s, index_start + start, r[i]))
+ {
+ return true;
+ }
+ }
+ else if (i == -1)
+ {
+ return false;
+ }
+ else
+ {
+ for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
+ {
+ CVC4::String t = s.substr(index_start + start, vec_k[i]);
+ if (testConstStringInRegExp(t, 0, r[i]))
+ {
+ start += vec_k[i];
+ left -= vec_k[i];
+ flag = false;
+ ++i;
+ vec_k[i] = -1;
+ break;
+ }
+ }
+ }
+
+ if (flag)
+ {
+ --i;
+ if (i >= 0)
+ {
+ start -= vec_k[i];
+ left += vec_k[i];
+ }
+ }
+ }
+ return false;
+ }
+ else
+ {
+ for (unsigned i = 0; i < r.getNumChildren(); ++i)
+ {
+ if (!testConstStringInRegExp(s, index_start, r[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+ case REGEXP_UNION:
+ {
+ for (unsigned i = 0; i < r.getNumChildren(); ++i)
+ {
+ if (testConstStringInRegExp(s, index_start, r[i]))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+ case REGEXP_INTER:
+ {
+ for (unsigned i = 0; i < r.getNumChildren(); ++i)
+ {
+ if (!testConstStringInRegExp(s, index_start, r[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ case REGEXP_STAR:
+ {
+ if (s.size() != index_start)
+ {
+ for (unsigned i = s.size() - index_start; i > 0; --i)
+ {
+ CVC4::String t = s.substr(index_start, i);
+ if (testConstStringInRegExp(t, 0, r[0]))
+ {
+ if (index_start + i == s.size()
+ || testConstStringInRegExp(s, index_start + i, r))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+ case REGEXP_EMPTY: { return false;
+ }
+ case REGEXP_SIGMA:
+ {
+ if (s.size() == index_start + 1)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ case REGEXP_RANGE:
+ {
+ if (s.size() == index_start + 1)
+ {
+ unsigned a = r[0].getConst<String>().front();
+ unsigned b = r[1].getConst<String>().front();
+ unsigned c = s.back();
+ return (a <= c && c <= b);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ case REGEXP_LOOP:
+ {
+ NodeManager* nm = NodeManager::currentNM();
+ uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+ if (s.size() == index_start)
+ {
+ return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
+ }
+ else if (l == 0 && r[1] == r[2])
+ {
+ return false;
+ }
+ else
+ {
+ Assert(r.getNumChildren() == 3)
+ << "String rewriter error: LOOP has 2 children";
+ if (l == 0)
+ {
+ // R{0,u}
+ uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ for (unsigned len = s.size() - index_start; len >= 1; len--)
+ {
+ CVC4::String t = s.substr(index_start, len);
+ if (testConstStringInRegExp(t, 0, r[0]))
+ {
+ if (len + index_start == s.size())
+ {
+ return true;
+ }
+ else
+ {
+ Node num2 = nm->mkConst(CVC4::Rational(u - 1));
+ Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
+ if (testConstStringInRegExp(s, index_start + len, r2))
+ {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+ else
+ {
+ // R{l,l}
+ Assert(r[1] == r[2])
+ << "String rewriter error: LOOP nums are not equal";
+ if (l > s.size() - index_start)
+ {
+ if (testConstStringInRegExp(s, s.size(), r[0]))
+ {
+ l = s.size() - index_start;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ for (unsigned len = 1; len <= s.size() - index_start; len++)
+ {
+ CVC4::String t = s.substr(index_start, len);
+ if (testConstStringInRegExp(t, 0, r[0]))
+ {
+ Node num2 = nm->mkConst(CVC4::Rational(l - 1));
+ Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
+ if (testConstStringInRegExp(s, index_start + len, r2))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ }
+ }
+ case REGEXP_COMPLEMENT:
+ {
+ return !testConstStringInRegExp(s, index_start, r[0]);
+ break;
+ }
+ default:
+ {
+ Assert(!utils::isRegExpKind(k));
+ return false;
+ }
+ }
+}
+
+bool RegExpEntail::hasEpsilonNode(TNode node)
+{
+ for (const Node& nc : node)
+ {
+ if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+Node RegExpEntail::getFixedLengthForRegexp(Node n)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ if (n.getKind() == STRING_TO_REGEXP)
+ {
+ Node ret = nm->mkNode(STRING_LENGTH, n[0]);
+ ret = Rewriter::rewrite(ret);
+ if (ret.isConst())
+ {
+ return ret;
+ }
+ }
+ else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
+ {
+ return nm->mkConst(Rational(1));
+ }
+ else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
+ {
+ Node ret;
+ for (const Node& nc : n)
+ {
+ Node flc = getFixedLengthForRegexp(nc);
+ if (flc.isNull() || (!ret.isNull() && ret != flc))
+ {
+ return Node::null();
+ }
+ else if (ret.isNull())
+ {
+ // first time
+ ret = flc;
+ }
+ }
+ return ret;
+ }
+ else if (n.getKind() == REGEXP_CONCAT)
+ {
+ NodeBuilder<> nb(PLUS);
+ for (const Node& nc : n)
+ {
+ Node flc = getFixedLengthForRegexp(nc);
+ if (flc.isNull())
+ {
+ return flc;
+ }
+ nb << flc;
+ }
+ Node ret = nb.constructNode();
+ ret = Rewriter::rewrite(ret);
+ return ret;
+ }
+ return Node::null();
+}
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
diff --git a/src/theory/strings/regexp_entail.h b/src/theory/strings/regexp_entail.h
new file mode 100644
index 000000000..0732a6770
--- /dev/null
+++ b/src/theory/strings/regexp_entail.h
@@ -0,0 +1,117 @@
+/********************* */
+/*! \file regexp_entail.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Entailment tests involving regular expressions
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H
+#define CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H
+
+#include <climits>
+#include <utility>
+#include <vector>
+
+#include "expr/attribute.h"
+#include "theory/strings/rewrites.h"
+#include "theory/theory_rewriter.h"
+#include "theory/type_enumerator.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+class RegExpEntail
+{
+ public:
+ /** simple regular expression consume
+ *
+ * This method is called when we are rewriting a membership of the form
+ * s1 ++ ... ++ sn in r1 ++ ... ++ rm
+ * We have that mchildren consists of the strings s1...sn, and children
+ * consists of the regular expressions r1...rm.
+ *
+ * This method tries to strip off parts of the concatenation terms. It updates
+ * the vectors such that the resulting vectors are such that the membership
+ * mchildren[n'...n''] in children[m'...m''] is equivalent to the input
+ * membership. The argument dir indicates the direction to consider, where
+ * 0 means strip off the front, 1 off the back, and < 0 off of both.
+ *
+ * If this method returns the false node, then we have inferred that no
+ * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or
+ * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null
+ * node.
+ *
+ * For example, given input
+ * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0,
+ * this method updates:
+ * mchildren = { "b", x }, children = { ("cd")* }
+ * and returns null.
+ *
+ * For example, given input
+ * { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1,
+ * this method updates:
+ * { "b" }, { [[y]] }
+ * where [[.]] denotes str.to.re, and returns null.
+ *
+ * Notice that the above requirement for returning false is stronger than
+ * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false.
+ * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false
+ * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb".
+ * We do not return false even though the above membership is equivalent
+ * to false. We do this because the function is used e.g. to test whether a
+ * possible unrolling leads to a conflict. This is demonstrated by the
+ * following examples:
+ *
+ * For example, given input
+ * { "bb", x }, { "b", ("a")* } and dir=-1,
+ * this method updates:
+ * { "b" }, { ("a")* }
+ * and returns null.
+ *
+ * For example, given input
+ * { "cb", x }, { "b", ("a")* } and dir=-1,
+ * this method leaves children and mchildren unchanged and returns false.
+ *
+ * Notice that based on this, we can determine that:
+ * "cb" ++ x in ( "b" ++ ("a")* )*
+ * is equivalent to false, whereas we cannot determine that:
+ * "bb" ++ x in ( "b" ++ ("a")* )*
+ * is equivalent to false.
+ */
+ static Node simpleRegexpConsume(std::vector<Node>& mchildren,
+ std::vector<Node>& children,
+ int dir = -1);
+ /** Is t a constant regular expression? */
+ static bool isConstRegExp(TNode t);
+ /**
+ * Does the substring of s starting at index_start occur in constant regular
+ * expression r?
+ */
+ static bool testConstStringInRegExp(CVC4::String& s,
+ unsigned index_start,
+ TNode r);
+ /** Does regular expression node have (str.to.re "") as a child? */
+ static bool hasEpsilonNode(TNode node);
+ /** get length for regular expression
+ *
+ * Given regular expression n, if this method returns a non-null value c, then
+ * x in n entails len( x ) = c.
+ */
+ static Node getFixedLengthForRegexp(Node n);
+};
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H */
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index d104f3ade..0520ec88a 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -18,7 +18,7 @@
#include "expr/kind.h"
#include "options/strings_options.h"
-#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/regexp_entail.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/strings/word.h"
@@ -83,7 +83,7 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
{
d_constCache[cur] = RE_C_CONSTANT;
}
- else if (!isRegExpKind(ck))
+ else if (!utils::isRegExpKind(ck))
{
// non-regular expression applications, e.g. function applications
// with regular expression return type are treated as variables.
@@ -115,15 +115,6 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
return d_constCache[r];
}
-bool RegExpOpr::isRegExpKind(Kind k)
-{
- return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
- || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
- || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
- || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
- || k == REGEXP_COMPLEMENT;
-}
-
// 0-unknown, 1-yes, 2-no
int RegExpOpr::delta( Node r, Node &exp ) {
Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl;
@@ -270,7 +261,7 @@ int RegExpOpr::delta( Node r, Node &exp ) {
break;
}
default: {
- Assert(!isRegExpKind(k));
+ Assert(!utils::isRegExpKind(k));
break;
}
}
@@ -521,7 +512,7 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
break;
}
default: {
- Assert(!isRegExpKind(r.getKind()));
+ Assert(!utils::isRegExpKind(r.getKind()));
return 0;
break;
}
@@ -808,7 +799,7 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
// aren't a standard regular expression kind. However, if we do, then
// the following code is conservative and says that the current
// regular expression can begin with any character.
- Assert(isRegExpKind(k));
+ Assert(utils::isRegExpKind(k));
// can start with any character
Assert(d_lastchar < std::numeric_limits<unsigned>::max());
for (unsigned i = 0; i <= d_lastchar; i++)
@@ -908,12 +899,12 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
// all strings in the language of R1 have the same length, say n,
// then the conclusion of the reduction is quantifier-free:
// ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
- Node reLength = SequencesRewriter::getFixedLengthForRegexp(r[0]);
+ Node reLength = RegExpEntail::getFixedLengthForRegexp(r[0]);
if (reLength.isNull())
{
// try from the opposite end
unsigned indexE = r.getNumChildren() - 1;
- reLength = SequencesRewriter::getFixedLengthForRegexp(r[indexE]);
+ reLength = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
if (!reLength.isNull())
{
indexRm = indexE;
@@ -1068,7 +1059,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
break;
}
default: {
- Assert(!isRegExpKind(k));
+ Assert(!utils::isRegExpKind(k));
break;
}
}
@@ -1267,7 +1258,7 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
break;
}
default: {
- Assert(!isRegExpKind(k));
+ Assert(!utils::isRegExpKind(k));
break;
}
}
@@ -1744,7 +1735,7 @@ std::string RegExpOpr::mkString( Node r ) {
std::stringstream ss;
ss << r;
retStr = ss.str();
- Assert(!isRegExpKind(r.getKind()));
+ Assert(!utils::isRegExpKind(r.getKind()));
break;
}
}
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
index 7845b2e00..53d60cd40 100644
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -121,8 +121,6 @@ class RegExpOpr {
bool checkConstRegExp( Node r );
/** get the constant type for regular expression r */
RegExpConstType getRegExpConstType(Node r);
- /** is k a native operator whose return type is a regular expression? */
- static bool isRegExpKind(Kind k);
void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
/**
* This method returns 1 if the empty string is in r, 2 if the empty string
diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp
index f6ef92b4d..35c52646d 100644
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -686,7 +686,7 @@ Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
{
Trace("strings-error") << "Unsupported term: " << r
<< " in normalization SymRegExp." << std::endl;
- Assert(!RegExpOpr::isRegExpKind(r.getKind()));
+ Assert(!utils::isRegExpKind(r.getKind()));
}
}
return ret;
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp
index be1e13459..9ccda55c2 100644
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -16,294 +16,22 @@
#include "theory/strings/sequences_rewriter.h"
-#include <stdint.h>
-#include <algorithm>
-
+#include "expr/attribute.h"
#include "expr/node_builder.h"
-#include "options/strings_options.h"
-#include "smt/logic_exception.h"
-#include "theory/arith/arith_msum.h"
-#include "theory/strings/regexp_operation.h"
+#include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/regexp_entail.h"
+#include "theory/strings/strings_entail.h"
#include "theory/strings/strings_rewriter.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/strings/word.h"
-#include "theory/theory.h"
-#include "util/integer.h"
-#include "util/rational.h"
using namespace std;
-using namespace CVC4;
using namespace CVC4::kind;
-using namespace CVC4::theory;
-using namespace CVC4::theory::strings;
-Node SequencesRewriter::simpleRegexpConsume(std::vector<Node>& mchildren,
- std::vector<Node>& children,
- int dir)
-{
- Trace("regexp-ext-rewrite-debug")
- << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
- Trace("regexp-ext-rewrite-debug")
- << " mchildren : " << mchildren << std::endl;
- Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl;
- NodeManager* nm = NodeManager::currentNM();
- unsigned tmin = dir < 0 ? 0 : dir;
- unsigned tmax = dir < 0 ? 1 : dir;
- // try to remove off front and back
- for (unsigned t = 0; t < 2; t++)
- {
- if (tmin <= t && t <= tmax)
- {
- bool do_next = true;
- while (!children.empty() && !mchildren.empty() && do_next)
- {
- do_next = false;
- Node xc = mchildren[mchildren.size() - 1];
- Node rc = children[children.size() - 1];
- Assert(rc.getKind() != kind::REGEXP_CONCAT);
- Assert(xc.getKind() != kind::STRING_CONCAT);
- if (rc.getKind() == kind::STRING_TO_REGEXP)
- {
- if (xc == rc[0])
- {
- children.pop_back();
- mchildren.pop_back();
- do_next = true;
- Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl;
- }
- else if (xc.isConst() && rc[0].isConst())
- {
- // split the constant
- int index;
- Node s = splitConstant(xc, rc[0], index, t == 0);
- Trace("regexp-ext-rewrite-debug")
- << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> "
- << s << " " << index << " " << t << std::endl;
- if (s.isNull())
- {
- Trace("regexp-ext-rewrite-debug")
- << "...return false" << std::endl;
- return NodeManager::currentNM()->mkConst(false);
- }
- else
- {
- Trace("regexp-ext-rewrite-debug")
- << "...strip equal const" << std::endl;
- children.pop_back();
- mchildren.pop_back();
- if (index == 0)
- {
- mchildren.push_back(s);
- }
- else
- {
- children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
- }
- }
- do_next = true;
- }
- }
- else if (xc.isConst())
- {
- // check for constants
- CVC4::String s = xc.getConst<String>();
- if (Word::isEmpty(xc))
- {
- Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl;
- // ignore and continue
- mchildren.pop_back();
- do_next = true;
- }
- else if (rc.getKind() == kind::REGEXP_RANGE
- || rc.getKind() == kind::REGEXP_SIGMA)
- {
- std::vector<unsigned> ssVec;
- ssVec.push_back(t == 0 ? s.back() : s.front());
- CVC4::String ss(ssVec);
- if (testConstStringInRegExp(ss, 0, rc))
- {
- // strip off one character
- mchildren.pop_back();
- if (s.size() > 1)
- {
- if (t == 0)
- {
- mchildren.push_back(NodeManager::currentNM()->mkConst(
- s.substr(0, s.size() - 1)));
- }
- else
- {
- mchildren.push_back(
- NodeManager::currentNM()->mkConst(s.substr(1)));
- }
- }
- children.pop_back();
- do_next = true;
- }
- else
- {
- return NodeManager::currentNM()->mkConst(false);
- }
- }
- else if (rc.getKind() == kind::REGEXP_INTER
- || rc.getKind() == kind::REGEXP_UNION)
- {
- // see if any/each child does not work
- bool result_valid = true;
- Node result;
- Node emp_s = NodeManager::currentNM()->mkConst(::CVC4::String(""));
- for (unsigned i = 0; i < rc.getNumChildren(); i++)
- {
- std::vector<Node> mchildren_s;
- std::vector<Node> children_s;
- mchildren_s.push_back(xc);
- utils::getConcat(rc[i], children_s);
- Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
- if (!ret.isNull())
- {
- // one conjunct cannot be satisfied, return false
- if (rc.getKind() == kind::REGEXP_INTER)
- {
- return ret;
- }
- }
- else
- {
- if (children_s.empty())
- {
- // if we were able to fully consume, store the result
- Assert(mchildren_s.size() <= 1);
- if (mchildren_s.empty())
- {
- mchildren_s.push_back(emp_s);
- }
- if (result.isNull())
- {
- result = mchildren_s[0];
- }
- else if (result != mchildren_s[0])
- {
- result_valid = false;
- }
- }
- else
- {
- result_valid = false;
- }
- }
- }
- if (result_valid)
- {
- if (result.isNull())
- {
- // all disjuncts cannot be satisfied, return false
- Assert(rc.getKind() == kind::REGEXP_UNION);
- return NodeManager::currentNM()->mkConst(false);
- }
- else
- {
- // all branches led to the same result
- children.pop_back();
- mchildren.pop_back();
- if (result != emp_s)
- {
- mchildren.push_back(result);
- }
- do_next = true;
- }
- }
- }
- else if (rc.getKind() == kind::REGEXP_STAR)
- {
- // check if there is no way that this star can be unrolled even once
- std::vector<Node> mchildren_s;
- mchildren_s.insert(
- mchildren_s.end(), mchildren.begin(), mchildren.end());
- if (t == 1)
- {
- std::reverse(mchildren_s.begin(), mchildren_s.end());
- }
- std::vector<Node> children_s;
- utils::getConcat(rc[0], children_s);
- Trace("regexp-ext-rewrite-debug")
- << "...recursive call on body of star" << std::endl;
- Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
- if (!ret.isNull())
- {
- Trace("regexp-ext-rewrite-debug")
- << "CRE : regexp star infeasable " << xc << " " << rc
- << std::endl;
- children.pop_back();
- if (!children.empty())
- {
- Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl;
- do_next = true;
- }
- }
- else
- {
- if (children_s.empty())
- {
- // check if beyond this, we can't do it or there is nothing
- // left, if so, repeat
- bool can_skip = false;
- if (children.size() > 1)
- {
- std::vector<Node> mchildren_ss;
- mchildren_ss.insert(
- mchildren_ss.end(), mchildren.begin(), mchildren.end());
- std::vector<Node> children_ss;
- children_ss.insert(
- children_ss.end(), children.begin(), children.end() - 1);
- if (t == 1)
- {
- std::reverse(mchildren_ss.begin(), mchildren_ss.end());
- std::reverse(children_ss.begin(), children_ss.end());
- }
- if (simpleRegexpConsume(mchildren_ss, children_ss, t)
- .isNull())
- {
- can_skip = true;
- }
- }
- if (!can_skip)
- {
- Trace("regexp-ext-rewrite-debug")
- << "...can't skip" << std::endl;
- // take the result of fully consuming once
- if (t == 1)
- {
- std::reverse(mchildren_s.begin(), mchildren_s.end());
- }
- mchildren.clear();
- mchildren.insert(
- mchildren.end(), mchildren_s.begin(), mchildren_s.end());
- do_next = true;
- }
- else
- {
- Trace("regexp-ext-rewrite-debug")
- << "...can skip " << rc << " from " << xc << std::endl;
- }
- }
- }
- }
- }
- if (!do_next)
- {
- Trace("regexp-ext-rewrite")
- << "Cannot consume : " << xc << " " << rc << std::endl;
- }
- }
- }
- if (dir != 0)
- {
- std::reverse(children.begin(), children.end());
- std::reverse(mchildren.begin(), mchildren.end());
- }
- }
- return Node::null();
-}
+namespace CVC4 {
+namespace theory {
+namespace strings {
Node SequencesRewriter::rewriteEquality(Node node)
{
@@ -325,7 +53,7 @@ Node SequencesRewriter::rewriteEquality(Node node)
// must call rewrite contains directly to avoid infinite loop
// we do a fix point since we may rewrite contains terms to simpler
// contains terms.
- Node ctn = checkEntailContains(node[r], node[1 - r], false);
+ Node ctn = StringsEntail::checkContains(node[r], node[1 - r], false);
if (!ctn.isNull())
{
if (!ctn.getConst<bool>())
@@ -474,7 +202,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
// ------- homogeneous constants
for (unsigned i = 0; i < 2; i++)
{
- Node cn = checkEntailHomogeneousString(node[i]);
+ Node cn = StringsEntail::checkHomogeneousString(node[i]);
if (!cn.isNull() && !Word::isEmpty(cn))
{
Assert(cn.isConst());
@@ -564,7 +292,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
}
// (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
- if (checkEntailNonEmpty(ne[2]))
+ if (StringsEntail::checkNonEmpty(ne[2]))
{
Node ret =
nm->mkNode(AND,
@@ -574,7 +302,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
}
// (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
- if (checkEntailLengthOne(ne[1]) && ne[2] == empty)
+ if (StringsEntail::checkLengthOne(ne[1]) && ne[2] == empty)
{
Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
@@ -584,7 +312,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
{
Node zero = nm->mkConst(Rational(0));
- if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true))
+ if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
{
// (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
if (ne[1] == zero)
@@ -600,7 +328,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
}
// (= "" (str.substr "A" 0 z)) ---> (<= z 0)
- if (checkEntailNonEmpty(ne[0]) && ne[1] == zero)
+ if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
{
Node ret = nm->mkNode(LEQ, ne[2], zero);
return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
@@ -618,7 +346,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
Node x = node[1 - i];
// (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
- if (checkEntailNonEmpty(x) && repl[0] == empty)
+ if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
{
Node ret = nm->mkNode(
EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1]));
@@ -649,7 +377,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
{
Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
- if (checkEntailArithEq(lenY, lenZ))
+ if (ArithEntail::checkEq(lenY, lenZ))
{
Node ret = nm->mkNode(OR,
nm->mkNode(EQUAL, repl[0], repl[1]),
@@ -672,7 +400,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
{
if (node[1 - i].getKind() == STRING_CONCAT)
{
- new_ret = inferEqsFromContains(node[i], node[1 - i]);
+ new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
if (!new_ret.isNull())
{
return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
@@ -709,7 +437,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
- if (checkEntailArithEq(lenPfx0, lenPfx1))
+ if (ArithEntail::checkEq(lenPfx0, lenPfx1))
{
std::vector<Node> sfxv0(v0.begin() + i, v0.end());
std::vector<Node> sfxv1(v1.begin() + j, v1.end());
@@ -719,7 +447,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
.eqNode(utils::mkConcat(sfxv1, stype)));
return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
}
- else if (checkEntailArith(lenPfx1, lenPfx0, true))
+ else if (ArithEntail::check(lenPfx1, lenPfx0, true))
{
// The prefix on the right-hand side is strictly longer than the
// prefix on the left-hand side, so we try to strip the right-hand
@@ -729,7 +457,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
// (= (str.++ "A" x y) (str.++ x "AB" z)) --->
// (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
std::vector<Node> rpfxv1;
- if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0))
+ if (StringsEntail::stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0))
{
std::vector<Node> sfxv0(v0.begin() + i, v0.end());
pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
@@ -746,7 +474,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
// in the inner loop)
break;
}
- else if (checkEntailArith(lenPfx0, lenPfx1, true))
+ else if (ArithEntail::check(lenPfx0, lenPfx1, true))
{
// The prefix on the left-hand side is strictly longer than the
// prefix on the right-hand side, so we try to strip the left-hand
@@ -756,7 +484,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node)
// (= (str.++ x "AB" z) (str.++ "A" x y)) --->
// (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
std::vector<Node> rpfxv0;
- if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1))
+ if (StringsEntail::stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1))
{
pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end());
std::vector<Node> sfxv1(v1.begin() + j, v1.end());
@@ -927,7 +655,7 @@ Node SequencesRewriter::rewriteConcat(Node node)
Node lastX;
for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
{
- Node s = getStringOrEmpty(node_vec[i]);
+ Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
bool nextX = false;
if (s != lastX)
{
@@ -1058,7 +786,8 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node)
{
// if empty, drop it
// e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
- if (isConstRegExp(curr) && testConstStringInRegExp(emptyStr, 0, curr))
+ if (RegExpEntail::isConstRegExp(curr)
+ && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
{
curr = Node::null();
}
@@ -1079,8 +808,9 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node)
lastAllStar = true;
// go back and remove empty ones from back of cvec
// e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
- while (!cvec.empty() && isConstRegExp(cvec.back())
- && testConstStringInRegExp(emptyStr, 0, cvec.back()))
+ while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
+ && RegExpEntail::testConstStringInRegExp(
+ emptyStr, 0, cvec.back()))
{
cvec.pop_back();
}
@@ -1145,7 +875,7 @@ Node SequencesRewriter::rewriteStarRegExp(TNode node)
else if (node[0].getKind() == REGEXP_UNION)
{
// simplification of unions under star
- if (hasEpsilonNode(node[0]))
+ if (RegExpEntail::hasEpsilonNode(node[0]))
{
bool changed = false;
std::vector<Node> node_vec;
@@ -1348,270 +1078,6 @@ Node SequencesRewriter::rewriteRangeRegExp(TNode node)
return node;
}
-bool SequencesRewriter::isConstRegExp(TNode t)
-{
- if (t.getKind() == kind::STRING_TO_REGEXP)
- {
- return t[0].isConst();
- }
- else if (t.isVar())
- {
- return false;
- }
- else
- {
- for (unsigned i = 0; i < t.getNumChildren(); ++i)
- {
- if (!isConstRegExp(t[i]))
- {
- return false;
- }
- }
- return true;
- }
-}
-
-bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s,
- unsigned int index_start,
- TNode r)
-{
- Assert(index_start <= s.size());
- Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
- << index_start << std::endl;
- Assert(!r.isVar());
- Kind k = r.getKind();
- switch (k)
- {
- case kind::STRING_TO_REGEXP:
- {
- CVC4::String s2 = s.substr(index_start, s.size() - index_start);
- if (r[0].isConst())
- {
- return (s2 == r[0].getConst<String>());
- }
- else
- {
- Assert(false) << "RegExp contains variables";
- return false;
- }
- }
- case kind::REGEXP_CONCAT:
- {
- if (s.size() != index_start)
- {
- std::vector<int> vec_k(r.getNumChildren(), -1);
- int start = 0;
- int left = (int)s.size() - index_start;
- int i = 0;
- while (i < (int)r.getNumChildren())
- {
- bool flag = true;
- if (i == (int)r.getNumChildren() - 1)
- {
- if (testConstStringInRegExp(s, index_start + start, r[i]))
- {
- return true;
- }
- }
- else if (i == -1)
- {
- return false;
- }
- else
- {
- for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
- {
- CVC4::String t = s.substr(index_start + start, vec_k[i]);
- if (testConstStringInRegExp(t, 0, r[i]))
- {
- start += vec_k[i];
- left -= vec_k[i];
- flag = false;
- ++i;
- vec_k[i] = -1;
- break;
- }
- }
- }
-
- if (flag)
- {
- --i;
- if (i >= 0)
- {
- start -= vec_k[i];
- left += vec_k[i];
- }
- }
- }
- return false;
- }
- else
- {
- for (unsigned i = 0; i < r.getNumChildren(); ++i)
- {
- if (!testConstStringInRegExp(s, index_start, r[i])) return false;
- }
- return true;
- }
- }
- case kind::REGEXP_UNION:
- {
- for (unsigned i = 0; i < r.getNumChildren(); ++i)
- {
- if (testConstStringInRegExp(s, index_start, r[i])) return true;
- }
- return false;
- }
- case kind::REGEXP_INTER:
- {
- for (unsigned i = 0; i < r.getNumChildren(); ++i)
- {
- if (!testConstStringInRegExp(s, index_start, r[i])) return false;
- }
- return true;
- }
- case kind::REGEXP_STAR:
- {
- if (s.size() != index_start)
- {
- for (unsigned i = s.size() - index_start; i > 0; --i)
- {
- CVC4::String t = s.substr(index_start, i);
- if (testConstStringInRegExp(t, 0, r[0]))
- {
- if (index_start + i == s.size()
- || testConstStringInRegExp(s, index_start + i, r))
- {
- return true;
- }
- }
- }
- return false;
- }
- else
- {
- return true;
- }
- }
- case kind::REGEXP_EMPTY: { return false;
- }
- case kind::REGEXP_SIGMA:
- {
- if (s.size() == index_start + 1)
- {
- return true;
- }
- else
- {
- return false;
- }
- }
- case kind::REGEXP_RANGE:
- {
- if (s.size() == index_start + 1)
- {
- unsigned a = r[0].getConst<String>().front();
- unsigned b = r[1].getConst<String>().front();
- unsigned c = s.back();
- return (a <= c && c <= b);
- }
- else
- {
- return false;
- }
- }
- case kind::REGEXP_LOOP:
- {
- uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
- if (s.size() == index_start)
- {
- return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
- }
- else if (l == 0 && r[1] == r[2])
- {
- return false;
- }
- else
- {
- Assert(r.getNumChildren() == 3)
- << "String rewriter error: LOOP has 2 children";
- if (l == 0)
- {
- // R{0,u}
- uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
- for (unsigned len = s.size() - index_start; len >= 1; len--)
- {
- CVC4::String t = s.substr(index_start, len);
- if (testConstStringInRegExp(t, 0, r[0]))
- {
- if (len + index_start == s.size())
- {
- return true;
- }
- else
- {
- Node num2 =
- NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
- Node r2 = NodeManager::currentNM()->mkNode(
- kind::REGEXP_LOOP, r[0], r[1], num2);
- if (testConstStringInRegExp(s, index_start + len, r2))
- {
- return true;
- }
- }
- }
- }
- return false;
- }
- else
- {
- // R{l,l}
- Assert(r[1] == r[2])
- << "String rewriter error: LOOP nums are not equal";
- if (l > s.size() - index_start)
- {
- if (testConstStringInRegExp(s, s.size(), r[0]))
- {
- l = s.size() - index_start;
- }
- else
- {
- return false;
- }
- }
- for (unsigned len = 1; len <= s.size() - index_start; len++)
- {
- CVC4::String t = s.substr(index_start, len);
- if (testConstStringInRegExp(t, 0, r[0]))
- {
- Node num2 =
- NodeManager::currentNM()->mkConst(CVC4::Rational(l - 1));
- Node r2 = NodeManager::currentNM()->mkNode(
- kind::REGEXP_LOOP, r[0], num2, num2);
- if (testConstStringInRegExp(s, index_start + len, r2))
- {
- return true;
- }
- }
- }
- return false;
- }
- }
- }
- case REGEXP_COMPLEMENT:
- {
- return !testConstStringInRegExp(s, index_start, r[0]);
- break;
- }
- default:
- {
- Assert(!RegExpOpr::isRegExpKind(k));
- return false;
- }
- }
-}
-
Node SequencesRewriter::rewriteMembership(TNode node)
{
NodeManager* nm = NodeManager::currentNM();
@@ -1626,12 +1092,12 @@ Node SequencesRewriter::rewriteMembership(TNode node)
Node retNode = NodeManager::currentNM()->mkConst(false);
return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
}
- else if (x.isConst() && isConstRegExp(r))
+ else if (x.isConst() && RegExpEntail::isConstRegExp(r))
{
// test whether x in node[1]
CVC4::String s = x.getConst<String>();
- Node retNode =
- NodeManager::currentNM()->mkConst(testConstStringInRegExp(s, 0, r));
+ bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
+ Node retNode = NodeManager::currentNM()->mkConst(test);
return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
}
else if (r.getKind() == kind::REGEXP_SIGMA)
@@ -1666,7 +1132,7 @@ Node SequencesRewriter::rewriteMembership(TNode node)
// (str.in.re (str.++ x1 ... xn) (re.* R)) -->
// (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
// if the length of all strings in R is one.
- Node flr = getFixedLengthForRegexp(r[0]);
+ Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
if (!flr.isNull())
{
Node one = nm->mkConst(Rational(1));
@@ -1794,7 +1260,7 @@ Node SequencesRewriter::rewriteMembership(TNode node)
success = false;
std::vector<Node> children;
utils::getConcat(r[0], children);
- Node scn = simpleRegexpConsume(mchildren, children, dir);
+ Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
if (!scn.isNull())
{
Trace("regexp-ext-rewrite")
@@ -1834,7 +1300,7 @@ Node SequencesRewriter::rewriteMembership(TNode node)
std::vector<Node> mchildren;
utils::getConcat(x, mchildren);
unsigned prevSize = children.size() + mchildren.size();
- Node scn = simpleRegexpConsume(mchildren, children);
+ Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
if (!scn.isNull())
{
Trace("regexp-ext-rewrite")
@@ -2003,19 +1469,6 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node)
return RewriteResponse(REWRITE_DONE, retNode);
}
-bool SequencesRewriter::hasEpsilonNode(TNode node)
-{
- for (unsigned int i = 0; i < node.getNumChildren(); i++)
- {
- if (node[i].getKind() == kind::STRING_TO_REGEXP && node[i][0].isConst()
- && Word::isEmpty(node[i][0]))
- {
- return true;
- }
- }
- return false;
-}
-
RewriteResponse SequencesRewriter::preRewrite(TNode node)
{
return RewriteResponse(REWRITE_DONE, node);
@@ -2106,12 +1559,12 @@ Node SequencesRewriter::rewriteSubstr(Node node)
Node zero = nm->mkConst(CVC4::Rational(0));
// if entailed non-positive length or negative start point
- if (checkEntailArith(zero, node[1], true))
+ if (ArithEntail::check(zero, node[1], true))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(node, ret, Rewrite::SS_START_NEG);
}
- else if (checkEntailArith(zero, node[2]))
+ else if (ArithEntail::check(zero, node[2]))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
@@ -2137,7 +1590,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
// over-approximation of the length of (str.substr x a a), which
// then allows us to reason that the result of the whole term must
// be empty.
- if (checkEntailArith(node[1], node[0][2]))
+ if (ArithEntail::check(node[1], node[0][2]))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
@@ -2150,8 +1603,8 @@ Node SequencesRewriter::rewriteSubstr(Node node)
// if (str.len y) = 1 and (str.len z) = 1
if (node[1] == zero)
{
- if (checkEntailLengthOne(node[0][1], true)
- && checkEntailLengthOne(node[0][2], true))
+ if (StringsEntail::checkLengthOne(node[0][1], true)
+ && StringsEntail::checkLengthOne(node[0][2], true))
{
Node ret = nm->mkNode(
kind::STRING_STRREPL,
@@ -2172,7 +1625,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
{
Node curr = node[2];
std::vector<Node> childrenr;
- if (stripSymbolicLength(n1, childrenr, 1, curr))
+ if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
{
if (curr != zero && !n1.empty())
{
@@ -2204,7 +1657,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
if (node[2] != tot_len)
{
- if (checkEntailArith(node[2], tot_len))
+ if (ArithEntail::check(node[2], tot_len))
{
// end point beyond end point of string, map to tot_len
Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
@@ -2220,7 +1673,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
// (str.substr s x y) --> "" if x < len(s) |= 0 >= y
Node n1_lt_tot_len =
Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
- if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false))
+ if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
@@ -2229,7 +1682,8 @@ Node SequencesRewriter::rewriteSubstr(Node node)
// (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
Node non_zero_len =
Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
- if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false))
+ if (ArithEntail::checkWithAssumption(
+ non_zero_len, node[1], tot_len, false))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
@@ -2238,7 +1692,8 @@ Node SequencesRewriter::rewriteSubstr(Node node)
// (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
Node geq_zero_start =
Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
- if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false))
+ if (ArithEntail::checkWithAssumption(
+ geq_zero_start, zero, tot_len, false))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(
@@ -2246,7 +1701,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
}
// (str.substr s x x) ---> "" if (str.len s) <= 1
- if (node[1] == node[2] && checkEntailLengthOne(node[0]))
+ if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
{
Node ret = Word::mkEmptyWord(node.getType());
return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
@@ -2257,7 +1712,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
// strip off components while quantity is entailed positive
int dir = r == 0 ? 1 : -1;
std::vector<Node> childrenr;
- if (stripSymbolicLength(n1, childrenr, dir, curr))
+ if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
{
if (r == 0)
{
@@ -2281,7 +1736,7 @@ Node SequencesRewriter::rewriteSubstr(Node node)
{
Node start_inner = node[0][1];
Node start_outer = node[1];
- if (checkEntailArith(start_outer) && checkEntailArith(start_inner))
+ if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
{
// both are positive
// thus, start point is definitely start_inner+start_outer.
@@ -2298,11 +1753,11 @@ Node SequencesRewriter::rewriteSubstr(Node node)
{
new_len = len_from_inner;
}
- else if (checkEntailArith(len_from_inner, len_from_outer))
+ else if (ArithEntail::check(len_from_inner, len_from_outer))
{
new_len = len_from_outer;
}
- else if (checkEntailArith(len_from_outer, len_from_inner))
+ else if (ArithEntail::check(len_from_outer, len_from_inner))
{
new_len = len_from_inner;
}
@@ -2343,7 +1798,7 @@ Node SequencesRewriter::rewriteContains(Node node)
{
Node len1 =
NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
- if (checkEntailArith(len1, true))
+ if (ArithEntail::check(len1, true))
{
// we handle the false case here since the rewrite for equality
// uses this function, hence we want to conclude false if possible.
@@ -2352,7 +1807,7 @@ Node SequencesRewriter::rewriteContains(Node node)
return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
}
}
- else if (checkEntailLengthOne(t))
+ else if (StringsEntail::checkLengthOne(t))
{
std::vector<Node> vec = Word::getChars(node[0]);
Node emp = Word::mkEmptyWord(t.getType());
@@ -2372,7 +1827,8 @@ Node SequencesRewriter::rewriteContains(Node node)
else if (node[1].getKind() == kind::STRING_CONCAT)
{
int firstc, lastc;
- if (!canConstantContainConcat(node[0], node[1], firstc, lastc))
+ if (!StringsEntail::canConstantContainConcat(
+ node[0], node[1], firstc, lastc))
{
Node ret = NodeManager::currentNM()->mkConst(false);
return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
@@ -2410,7 +1866,7 @@ Node SequencesRewriter::rewriteContains(Node node)
}
else if (node[0].getKind() == STRING_STRREPL)
{
- Node rplDomain = checkEntailContains(node[0][1], node[1]);
+ Node rplDomain = StringsEntail::checkContains(node[0][1], node[1]);
if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
{
Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
@@ -2436,7 +1892,7 @@ Node SequencesRewriter::rewriteContains(Node node)
// component-wise containment
std::vector<Node> nc1rb;
std::vector<Node> nc1re;
- if (componentContains(nc1, nc2, nc1rb, nc1re) != -1)
+ if (StringsEntail::componentContains(nc1, nc2, nc1rb, nc1re) != -1)
{
Node ret = NodeManager::currentNM()->mkConst(true);
return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
@@ -2446,7 +1902,7 @@ Node SequencesRewriter::rewriteContains(Node node)
// strip endpoints
std::vector<Node> nb;
std::vector<Node> ne;
- if (stripConstantEndpoints(nc1, nc2, nb, ne))
+ if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
{
Node ret = NodeManager::currentNM()->mkNode(
kind::STRING_STRCTN, utils::mkConcat(nc1, stype), node[1]);
@@ -2464,10 +1920,10 @@ Node SequencesRewriter::rewriteContains(Node node)
// replacement does not change y. (str.contains x w) checks that if the
// replacement changes anything in y, the w makes it impossible for it to
// occur in x.
- Node ctnConst = checkEntailContains(node[0], n[0]);
+ Node ctnConst = StringsEntail::checkContains(node[0], n[0]);
if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
{
- Node ctnConst2 = checkEntailContains(node[0], n[2]);
+ Node ctnConst2 = StringsEntail::checkContains(node[0], n[2]);
if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
{
Node res = nm->mkConst(false);
@@ -2511,7 +1967,7 @@ Node SequencesRewriter::rewriteContains(Node node)
// length entailment
Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
- if (checkEntailArith(len_n2, len_n1, true))
+ if (ArithEntail::check(len_n2, len_n1, true))
{
// len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
Node ret = NodeManager::currentNM()->mkConst(false);
@@ -2522,13 +1978,13 @@ Node SequencesRewriter::rewriteContains(Node node)
// For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
// since the number of a's in the second argument is greater than the number
// of a's in the first argument
- if (checkEntailMultisetSubset(node[0], node[1]))
+ if (StringsEntail::checkMultisetSubset(node[0], node[1]))
{
Node ret = nm->mkConst(false);
return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
}
- if (checkEntailArith(len_n2, len_n1, false))
+ if (ArithEntail::check(len_n2, len_n1, false))
{
// len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
Node ret = node[0].eqNode(node[1]);
@@ -2613,7 +2069,7 @@ Node SequencesRewriter::rewriteContains(Node node)
// (str.contains (str.replace x y x) z) ---> (str.contains x z)
// if (str.len z) <= 1
- if (checkEntailLengthOne(node[1]))
+ if (StringsEntail::checkLengthOne(node[1]))
{
Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
@@ -2633,9 +2089,9 @@ Node SequencesRewriter::rewriteContains(Node node)
// (str.contains (str.replace x y z) w) --->
// (str.contains (str.replace x y "") w)
// if (str.contains z w) ---> false and (str.len w) = 1
- if (checkEntailLengthOne(node[1]))
+ if (StringsEntail::checkLengthOne(node[1]))
{
- Node ctn = checkEntailContains(node[1], node[0][2]);
+ Node ctn = StringsEntail::checkContains(node[1], node[0][2]);
if (!ctn.isNull() && !ctn.getConst<bool>())
{
Node empty = nm->mkConst(String(""));
@@ -2734,7 +2190,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
}
}
- if (checkEntailArith(node[2], true))
+ if (ArithEntail::check(node[2], true))
{
// y>0 implies indexof( x, x, y ) --> -1
Node negone = nm->mkConst(Rational(-1));
@@ -2757,7 +2213,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
{
if (Word::isEmpty(node[1]))
{
- if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2]))
+ if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
{
// len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
@@ -2765,7 +2221,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
}
}
- if (checkEntailArith(len1, len0m2, true))
+ if (ArithEntail::check(len1, len0m2, true))
{
// len(x)-z < len(y) implies indexof( x, y, z ) ----> -1
Node negone = nm->mkConst(Rational(-1));
@@ -2779,7 +2235,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
fstr = Rewriter::rewrite(fstr);
}
- Node cmp_conr = checkEntailContains(fstr, node[1]);
+ Node cmp_conr = StringsEntail::checkContains(fstr, node[1]);
Trace("strings-rewrite-debug") << "For " << node << ", check contains("
<< fstr << ", " << node[1] << ")" << std::endl;
Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
@@ -2794,7 +2250,8 @@ Node SequencesRewriter::rewriteIndexof(Node node)
// past the first position in node[0] that contains node[1], we can drop
std::vector<Node> nb;
std::vector<Node> ne;
- int cc = componentContains(children0, children1, nb, ne, true, 1);
+ int cc = StringsEntail::componentContains(
+ children0, children1, nb, ne, true, 1);
if (cc != -1 && !ne.empty())
{
// For example:
@@ -2805,7 +2262,8 @@ Node SequencesRewriter::rewriteIndexof(Node node)
}
// Strip components from the beginning that are guaranteed not to match
- if (stripConstantEndpoints(children0, children1, nb, ne, 1))
+ if (StringsEntail::stripConstantEndpoints(
+ children0, children1, nb, ne, 1))
{
// str.indexof(str.++("AB", x, "C"), "C", 0) --->
// 2 + str.indexof(str.++(x, "C"), "C", 0)
@@ -2823,7 +2281,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
// strip symbolic length
Node new_len = node[2];
std::vector<Node> nr;
- if (stripSymbolicLength(children0, nr, 1, new_len))
+ if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
{
// For example:
// z>str.len( x1 ) and str.contains( x2, y )-->true
@@ -2849,7 +2307,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
{
Node new_len = node[2];
std::vector<Node> nr;
- if (stripSymbolicLength(children0, nr, 1, new_len))
+ if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
{
// Normalize the string before the start index.
//
@@ -2875,7 +2333,7 @@ Node SequencesRewriter::rewriteIndexof(Node node)
{
std::vector<Node> cb;
std::vector<Node> ce;
- if (stripConstantEndpoints(children0, children1, cb, ce, -1))
+ if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
{
Node ret = utils::mkConcat(children0, stype);
ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]);
@@ -2950,14 +2408,14 @@ Node SequencesRewriter::rewriteReplace(Node node)
// ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
- if (checkEntailArith(l1, l0))
+ if (ArithEntail::check(l1, l0))
{
return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
}
// (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
// if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
- if (checkEntailLengthOne(node[0]))
+ if (StringsEntail::checkLengthOne(node[0]))
{
Node empty = nm->mkConst(String(""));
Node rn1 = Rewriter::rewrite(
@@ -2966,7 +2424,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
{
std::vector<Node> emptyNodes;
bool allEmptyEqs;
- std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1);
+ std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
if (allEmptyEqs)
{
@@ -2987,14 +2445,15 @@ Node SequencesRewriter::rewriteReplace(Node node)
// check if contains definitely does (or does not) hold
Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]);
Node cmp_conr = Rewriter::rewrite(cmp_con);
- if (!checkEntailContains(node[0], node[1]).isNull())
+ if (!StringsEntail::checkContains(node[0], node[1]).isNull())
{
if (cmp_conr.getConst<bool>())
{
// component-wise containment
std::vector<Node> cb;
std::vector<Node> ce;
- int cc = componentContains(children0, children1, cb, ce, true, 1);
+ int cc = StringsEntail::componentContains(
+ children0, children1, cb, ce, true, 1);
if (cc != -1)
{
if (cc == 0 && children0[0] == children1[0])
@@ -3052,7 +2511,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
std::vector<Node> emptyNodes;
bool allEmptyEqs;
- std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr);
+ std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
if (emptyNodes.size() > 0)
{
@@ -3089,7 +2548,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
if (cmp_conr != cmp_con)
{
- if (checkEntailNonEmpty(node[1]))
+ if (StringsEntail::checkNonEmpty(node[1]))
{
// pull endpoints that can be stripped
// for example,
@@ -3097,7 +2556,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
// str.++( "b", str.replace( x, "a", y ), "b" )
std::vector<Node> cb;
std::vector<Node> ce;
- if (stripConstantEndpoints(children0, children1, cb, ce))
+ if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
{
std::vector<Node> cc;
cc.insert(cc.end(), cb.begin(), cb.end());
@@ -3138,7 +2597,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
// Check len(t) + j > len(x) + 1
- if (checkEntailArith(maxLen1, len0_1, true))
+ if (ArithEntail::check(maxLen1, len0_1, true))
{
children1.push_back(nm->mkNode(
kind::STRING_SUBSTR,
@@ -3188,7 +2647,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
// (str.len w) >= (str.len z)
Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
- if (checkEntailArith(wlen, zlen))
+ if (ArithEntail::check(wlen, zlen))
{
// w != z
Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
@@ -3214,7 +2673,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
}
bool dualReplIteSuccess = false;
- Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]);
+ Node cmp_con2 = StringsEntail::checkContains(node[1][0], node[1][2]);
if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
{
// str.contains( x, z ) ---> false
@@ -3229,10 +2688,10 @@ Node SequencesRewriter::rewriteReplace(Node node)
// implies
// str.replace( x, str.replace( x, y, z ), w ) --->
// ite( str.contains( x, y ), x, w )
- cmp_con2 = checkEntailContains(node[1][1], node[1][2]);
+ cmp_con2 = StringsEntail::checkContains(node[1][1], node[1][2]);
if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
{
- cmp_con2 = checkEntailContains(node[1][2], node[1][1]);
+ cmp_con2 = StringsEntail::checkContains(node[1][2], node[1][1]);
if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
{
dualReplIteSuccess = true;
@@ -3262,7 +2721,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
// str.contains(y, z) ----> false and ( y == w or x == w ) implies
// implies
// str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
- Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]);
+ Node cmp_con2 = StringsEntail::checkContains(node[1][0], node[1][2]);
invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
}
}
@@ -3271,10 +2730,10 @@ Node SequencesRewriter::rewriteReplace(Node node)
// str.contains(x, z) ----> false and str.contains(x, w) ----> false
// implies
// str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
- Node cmp_con2 = checkEntailContains(node[0], node[1][1]);
+ Node cmp_con2 = StringsEntail::checkContains(node[0], node[1][1]);
if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
{
- cmp_con2 = checkEntailContains(node[0], node[1][2]);
+ cmp_con2 = StringsEntail::checkContains(node[0], node[1][2]);
invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
}
}
@@ -3290,7 +2749,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
{
// str.contains( z, w ) ----> false implies
// str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
- Node cmp_con2 = checkEntailContains(node[1], node[2][0]);
+ Node cmp_con2 = StringsEntail::checkContains(node[1], node[2][0]);
if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
{
Node res =
@@ -3310,7 +2769,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
{
// str.contains( x, z ) ----> false implies
// str.replace( x, y, str.replace( y, z, w ) ) ---> x
- cmp_con = checkEntailContains(node[0], node[2][1]);
+ cmp_con = StringsEntail::checkContains(node[0], node[2][1]);
success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
}
if (success)
@@ -3324,7 +2783,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
// str.replace( x ++ y ++ x ++ y, "A", z ) -->
// str.replace( x ++ y, "A", z ) ++ x ++ y
// since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
- if (checkEntailLengthOne(node[1]))
+ if (StringsEntail::checkLengthOne(node[1]))
{
Node lastLhs;
unsigned lastCheckIndex = 0;
@@ -3336,7 +2795,7 @@ Node SequencesRewriter::rewriteReplace(Node node)
checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
Node lhs = utils::mkConcat(checkLhs, stype);
Node rhs = children0[checkIndex];
- Node ctn = checkEntailContains(lhs, rhs);
+ Node ctn = StringsEntail::checkContains(lhs, rhs);
if (!ctn.isNull() && ctn.getConst<bool>())
{
lastLhs = lhs;
@@ -3442,7 +2901,7 @@ Node SequencesRewriter::rewriteReplaceInternal(Node node)
if (node[0] == node[1])
{
// only holds for replaceall if non-empty
- if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1]))
+ if (nk == STRING_STRREPL || StringsEntail::checkNonEmpty(node[1]))
{
return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
}
@@ -3550,7 +3009,7 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n)
// Check if we can turn the prefix/suffix into equalities by showing that the
// prefix/suffix is at least as long as the string
- Node eqs = inferEqsFromContains(n[1], n[0]);
+ Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
if (!eqs.isNull())
{
return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
@@ -3563,670 +3022,12 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n)
return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
}
-Node SequencesRewriter::splitConstant(Node a, Node b, int& index, bool isRev)
-{
- Assert(a.isConst() && b.isConst());
- size_t lenA = Word::getLength(a);
- size_t lenB = Word::getLength(b);
- index = lenA <= lenB ? 1 : 0;
- size_t len_short = index == 1 ? lenA : lenB;
- bool cmp =
- isRev ? a.getConst<String>().rstrncmp(b.getConst<String>(), len_short)
- : a.getConst<String>().strncmp(b.getConst<String>(), len_short);
- if (cmp)
- {
- Node l = index == 0 ? a : b;
- if (isRev)
- {
- int new_len = l.getConst<String>().size() - len_short;
- return Word::substr(l, 0, new_len);
- }
- else
- {
- return Word::substr(l, len_short);
- }
- }
- // not the same prefix/suffix
- return Node::null();
-}
-
-bool SequencesRewriter::canConstantContainConcat(Node c,
- Node n,
- int& firstc,
- int& lastc)
-{
- Assert(c.isConst());
- CVC4::String t = c.getConst<String>();
- const std::vector<unsigned>& tvec = t.getVec();
- Assert(n.getKind() == kind::STRING_CONCAT);
- // must find constant components in order
- size_t pos = 0;
- firstc = -1;
- lastc = -1;
- for (unsigned i = 0; i < n.getNumChildren(); i++)
- {
- if (n[i].isConst())
- {
- firstc = firstc == -1 ? i : firstc;
- lastc = i;
- CVC4::String s = n[i].getConst<String>();
- size_t new_pos = t.find(s, pos);
- if (new_pos == std::string::npos)
- {
- return false;
- }
- else
- {
- pos = new_pos + s.size();
- }
- }
- else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0]))
- {
- // find the first occurrence of a digit starting at pos
- while (pos < tvec.size() && !String::isDigit(tvec[pos]))
- {
- pos++;
- }
- if (pos == tvec.size())
- {
- return false;
- }
- // must consume at least one digit here
- pos++;
- }
- }
- return true;
-}
-
-bool SequencesRewriter::canConstantContainList(Node c,
- std::vector<Node>& l,
- int& firstc,
- int& lastc)
-{
- Assert(c.isConst());
- // must find constant components in order
- size_t pos = 0;
- firstc = -1;
- lastc = -1;
- for (unsigned i = 0; i < l.size(); i++)
- {
- if (l[i].isConst())
- {
- firstc = firstc == -1 ? i : firstc;
- lastc = i;
- size_t new_pos = Word::find(c, l[i], pos);
- if (new_pos == std::string::npos)
- {
- return false;
- }
- else
- {
- pos = new_pos + Word::getLength(l[i]);
- }
- }
- }
- return true;
-}
-
-bool SequencesRewriter::stripSymbolicLength(std::vector<Node>& n1,
- std::vector<Node>& nr,
- int dir,
- Node& curr)
-{
- Assert(dir == 1 || dir == -1);
- Assert(nr.empty());
- Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
- bool ret = false;
- bool success;
- unsigned sindex = 0;
- do
- {
- Assert(!curr.isNull());
- success = false;
- if (curr != zero && sindex < n1.size())
- {
- unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
- if (n1[sindex_use].isConst())
- {
- // could strip part of a constant
- Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr));
- if (!lowerBound.isNull())
- {
- Assert(lowerBound.isConst());
- Rational lbr = lowerBound.getConst<Rational>();
- if (lbr.sgn() > 0)
- {
- Assert(checkEntailArith(curr, true));
- CVC4::String s = n1[sindex_use].getConst<String>();
- Node ncl =
- NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
- Node next_s =
- NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl);
- next_s = Rewriter::rewrite(next_s);
- Assert(next_s.isConst());
- // we can remove the entire constant
- if (next_s.getConst<Rational>().sgn() >= 0)
- {
- curr = Rewriter::rewrite(
- NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl));
- success = true;
- sindex++;
- }
- else
- {
- // we can remove part of the constant
- // lower bound minus the length of a concrete string is negative,
- // hence lowerBound cannot be larger than long max
- Assert(lbr < Rational(String::maxSize()));
- curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
- kind::MINUS, curr, lowerBound));
- uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
- Assert(lbsize < s.size());
- if (dir == 1)
- {
- // strip partially from the front
- nr.push_back(
- NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
- n1[sindex_use] = NodeManager::currentNM()->mkConst(
- s.suffix(s.size() - lbsize));
- }
- else
- {
- // strip partially from the back
- nr.push_back(
- NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
- n1[sindex_use] = NodeManager::currentNM()->mkConst(
- s.prefix(s.size() - lbsize));
- }
- ret = true;
- }
- Assert(checkEntailArith(curr));
- }
- else
- {
- // we cannot remove the constant
- }
- }
- }
- else
- {
- Node next_s = NodeManager::currentNM()->mkNode(
- kind::MINUS,
- curr,
- NodeManager::currentNM()->mkNode(kind::STRING_LENGTH,
- n1[sindex_use]));
- next_s = Rewriter::rewrite(next_s);
- if (checkEntailArith(next_s))
- {
- success = true;
- curr = next_s;
- sindex++;
- }
- }
- }
- } while (success);
- if (sindex > 0)
- {
- if (dir == 1)
- {
- nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
- n1.erase(n1.begin(), n1.begin() + sindex);
- }
- else
- {
- nr.insert(nr.end(), n1.end() - sindex, n1.end());
- n1.erase(n1.end() - sindex, n1.end());
- }
- ret = true;
- }
- return ret;
-}
-
-int SequencesRewriter::componentContains(std::vector<Node>& n1,
- std::vector<Node>& n2,
- std::vector<Node>& nb,
- std::vector<Node>& ne,
- bool computeRemainder,
- int remainderDir)
-{
- Assert(nb.empty());
- Assert(ne.empty());
- // if n2 is a singleton, we can do optimized version here
- if (n2.size() == 1)
- {
- for (unsigned i = 0; i < n1.size(); i++)
- {
- Node n1rb;
- Node n1re;
- if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
- {
- if (computeRemainder)
- {
- n1[i] = n2[0];
- if (remainderDir != -1)
- {
- if (!n1re.isNull())
- {
- ne.push_back(n1re);
- }
- ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
- n1.erase(n1.begin() + i + 1, n1.end());
- }
- else if (!n1re.isNull())
- {
- n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
- kind::STRING_CONCAT, n1[i], n1re));
- }
- if (remainderDir != 1)
- {
- nb.insert(nb.end(), n1.begin(), n1.begin() + i);
- n1.erase(n1.begin(), n1.begin() + i);
- if (!n1rb.isNull())
- {
- nb.push_back(n1rb);
- }
- }
- else if (!n1rb.isNull())
- {
- n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
- kind::STRING_CONCAT, n1rb, n1[i]));
- }
- }
- return i;
- }
- }
- }
- else if (n1.size() >= n2.size())
- {
- unsigned diff = n1.size() - n2.size();
- for (unsigned i = 0; i <= diff; i++)
- {
- Node n1rb_first;
- Node n1re_first;
- // first component of n2 must be a suffix
- if (componentContainsBase(n1[i],
- n2[0],
- n1rb_first,
- n1re_first,
- 1,
- computeRemainder && remainderDir != 1))
- {
- Assert(n1re_first.isNull());
- for (unsigned j = 1; j < n2.size(); j++)
- {
- // are we in the last component?
- if (j + 1 == n2.size())
- {
- Node n1rb_last;
- Node n1re_last;
- // last component of n2 must be a prefix
- if (componentContainsBase(n1[i + j],
- n2[j],
- n1rb_last,
- n1re_last,
- -1,
- computeRemainder && remainderDir != -1))
- {
- Assert(n1rb_last.isNull());
- if (computeRemainder)
- {
- if (remainderDir != -1)
- {
- if (!n1re_last.isNull())
- {
- ne.push_back(n1re_last);
- }
- ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
- n1.erase(n1.begin() + i + j + 1, n1.end());
- n1[i + j] = n2[j];
- }
- if (remainderDir != 1)
- {
- n1[i] = n2[0];
- nb.insert(nb.end(), n1.begin(), n1.begin() + i);
- n1.erase(n1.begin(), n1.begin() + i);
- if (!n1rb_first.isNull())
- {
- nb.push_back(n1rb_first);
- }
- }
- }
- return i;
- }
- else
- {
- break;
- }
- }
- else if (n1[i + j] != n2[j])
- {
- break;
- }
- }
- }
- }
- }
- return -1;
-}
-
-bool SequencesRewriter::componentContainsBase(
- Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
-{
- Assert(n1rb.isNull());
- Assert(n1re.isNull());
-
- NodeManager* nm = NodeManager::currentNM();
-
- if (n1 == n2)
- {
- return true;
- }
- else
- {
- if (n1.isConst() && n2.isConst())
- {
- size_t len1 = Word::getLength(n1);
- size_t len2 = Word::getLength(n2);
- if (len2 < len1)
- {
- if (dir == 1)
- {
- if (Word::suffix(n1, len2) == n2)
- {
- if (computeRemainder)
- {
- n1rb = Word::prefix(n1, len1 - len2);
- }
- return true;
- }
- }
- else if (dir == -1)
- {
- if (Word::prefix(n1, len2) == n2)
- {
- if (computeRemainder)
- {
- n1re = Word::suffix(n1, len1 - len2);
- }
- return true;
- }
- }
- else
- {
- size_t f = Word::find(n1, n2);
- if (f != std::string::npos)
- {
- if (computeRemainder)
- {
- if (f > 0)
- {
- n1rb = Word::prefix(n1, f);
- }
- if (len1 > f + len2)
- {
- n1re = Word::suffix(n1, len1 - (f + len2));
- }
- }
- return true;
- }
- }
- }
- }
- else
- {
- // cases for:
- // n1 = x containing n2 = substr( x, n2[1], n2[2] )
- if (n2.getKind() == kind::STRING_SUBSTR)
- {
- if (n2[0] == n1)
- {
- bool success = true;
- Node start_pos = n2[1];
- Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]);
- Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]);
- if (dir == 1)
- {
- // To be a suffix, start + length must be greater than
- // or equal to the length of the string.
- success = checkEntailArith(end_pos, len_n2s);
- }
- else if (dir == -1)
- {
- // To be a prefix, must literally start at 0, since
- // if we knew it started at <0, it should be rewritten to "",
- // if we knew it started at 0, then n2[1] should be rewritten to
- // 0.
- success = start_pos.isConst()
- && start_pos.getConst<Rational>().sgn() == 0;
- }
- if (success)
- {
- if (computeRemainder)
- {
- // we can only compute the remainder if start_pos and end_pos
- // are known to be non-negative.
- if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos))
- {
- return false;
- }
- if (dir != 1)
- {
- n1rb = nm->mkNode(kind::STRING_SUBSTR,
- n2[0],
- nm->mkConst(Rational(0)),
- start_pos);
- }
- if (dir != -1)
- {
- n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s);
- }
- }
- return true;
- }
- }
- }
-
- if (!computeRemainder && dir == 0)
- {
- if (n1.getKind() == STRING_STRREPL)
- {
- // (str.contains (str.replace x y z) w) ---> true
- // if (str.contains x w) --> true and (str.contains z w) ---> true
- Node xCtnW = checkEntailContains(n1[0], n2);
- if (!xCtnW.isNull() && xCtnW.getConst<bool>())
- {
- Node zCtnW = checkEntailContains(n1[2], n2);
- if (!zCtnW.isNull() && zCtnW.getConst<bool>())
- {
- return true;
- }
- }
- }
- }
- }
- }
- return false;
-}
-
-bool SequencesRewriter::stripConstantEndpoints(std::vector<Node>& n1,
- std::vector<Node>& n2,
- std::vector<Node>& nb,
- std::vector<Node>& ne,
- int dir)
+Node SequencesRewriter::lengthPreserveRewrite(Node n)
{
- Assert(nb.empty());
- Assert(ne.empty());
-
NodeManager* nm = NodeManager::currentNM();
- bool changed = false;
- // for ( forwards, backwards ) direction
- for (unsigned r = 0; r < 2; r++)
- {
- if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
- {
- unsigned index0 = r == 0 ? 0 : n1.size() - 1;
- unsigned index1 = r == 0 ? 0 : n2.size() - 1;
- bool removeComponent = false;
- Node n1cmp = n1[index0];
-
- if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
- {
- return false;
- }
-
- std::vector<Node> sss;
- std::vector<Node> sls;
- n1cmp = decomposeSubstrChain(n1cmp, sss, sls);
- Trace("strings-rewrite-debug2")
- << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
- << ", dir = " << dir << std::endl;
- if (n1cmp.isConst())
- {
- CVC4::String s = n1cmp.getConst<String>();
- // overlap is an overapproximation of the number of characters
- // n2[index1] can match in s
- unsigned overlap = s.size();
- if (n2[index1].isConst())
- {
- CVC4::String t = n2[index1].getConst<String>();
- std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
- if (ret == std::string::npos)
- {
- if (n1.size() == 1)
- {
- // can remove everything
- // e.g. str.contains( "abc", str.++( "ba", x ) ) -->
- // str.contains( "", str.++( "ba", x ) )
- removeComponent = true;
- }
- else if (sss.empty()) // only if not substr
- {
- // check how much overlap there is
- // This is used to partially strip off the endpoint
- // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
- // str.contains( str.++( "c", x ), str.++( "cd", y ) )
- overlap = r == 0 ? s.overlap(t) : t.overlap(s);
- }
- else
- {
- // if we are looking at a substring, we can remove the component
- // if there is no overlap
- // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
- // --> str.contains( x, "a" )
- removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
- }
- }
- else if (sss.empty()) // only if not substr
- {
- Assert(ret < s.size());
- // can strip off up to the find position, e.g.
- // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
- // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
- // and
- // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
- // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
- overlap = s.size() - ret;
- }
- }
- else
- {
- // inconclusive
- }
- // process the overlap
- if (overlap < s.size())
- {
- changed = true;
- if (overlap == 0)
- {
- removeComponent = true;
- }
- else
- {
- // can drop the prefix (resp. suffix) from the first (resp. last)
- // component
- if (r == 0)
- {
- nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
- n1[index0] = nm->mkConst(s.suffix(overlap));
- }
- else
- {
- ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
- n1[index0] = nm->mkConst(s.prefix(overlap));
- }
- }
- }
- }
- else if (n1cmp.getKind() == kind::STRING_ITOS)
- {
- if (n2[index1].isConst())
- {
- CVC4::String t = n2[index1].getConst<String>();
-
- if (n1.size() == 1)
- {
- // if n1.size()==1, then if n2[index1] is not a number, we can drop
- // the entire component
- // e.g. str.contains( int.to.str(x), "123a45") --> false
- if (!t.isNumber())
- {
- removeComponent = true;
- }
- }
- else
- {
- const std::vector<unsigned>& tvec = t.getVec();
- Assert(tvec.size() > 0);
-
- // if n1.size()>1, then if the first (resp. last) character of
- // n2[index1]
- // is not a digit, we can drop the entire component, e.g.:
- // str.contains( str.++( int.to.str(x), y ), "a12") -->
- // str.contains( y, "a12" )
- // str.contains( str.++( y, int.to.str(x) ), "a0b") -->
- // str.contains( y, "a0b" )
- unsigned i = r == 0 ? 0 : (tvec.size() - 1);
- if (!String::isDigit(tvec[i]))
- {
- removeComponent = true;
- }
- }
- }
- }
- if (removeComponent)
- {
- // can drop entire first (resp. last) component
- if (r == 0)
- {
- nb.push_back(n1[index0]);
- n1.erase(n1.begin(), n1.begin() + 1);
- }
- else
- {
- ne.push_back(n1[index0]);
- n1.pop_back();
- }
- if (n1.empty())
- {
- // if we've removed everything, just return (we will rewrite to false)
- return true;
- }
- else
- {
- changed = true;
- }
- }
- }
- }
- // TODO (#1180) : computing the maximal overlap in this function may be
- // important.
- // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
- // false
- // ...since str.to.int(x) can contain at most 1 character from "2aaaa",
- // leaving 4 characters
- // which is larger that the upper bound for length of str.substr(y,0,3),
- // which is 3.
- return changed;
+ Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
+ Node res = canonicalStrForSymbolicLength(len, n.getType());
+ return res.isNull() ? n : res;
}
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
@@ -4234,7 +3035,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
NodeManager* nm = NodeManager::currentNM();
Node res;
- if (len.getKind() == kind::CONST_RATIONAL)
+ if (len.getKind() == CONST_RATIONAL)
{
// c -> "A" repeated c times
Rational ratLen = len.getConst<Rational>();
@@ -4250,10 +3051,10 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
Unimplemented() << "canonicalStrForSymbolicLength for non-string";
}
}
- else if (len.getKind() == kind::PLUS)
+ else if (len.getKind() == PLUS)
{
// x + y -> norm(x) + norm(y)
- NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
+ NodeBuilder<> concatBuilder(STRING_CONCAT);
for (const auto& n : len)
{
Node sn = canonicalStrForSymbolicLength(n, stype);
@@ -4267,7 +3068,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
}
res = concatBuilder.constructNode();
}
- else if (len.getKind() == kind::MULT && len.getNumChildren() == 2
+ else if (len.getKind() == MULT && len.getNumChildren() == 2
&& len[0].isConst())
{
// c * x -> norm(x) repeated c times
@@ -4278,14 +3079,14 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
Node nRep = canonicalStrForSymbolicLength(len[1], stype);
std::vector<Node> nRepChildren;
utils::getConcat(nRep, nRepChildren);
- NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
+ NodeBuilder<> concatBuilder(STRING_CONCAT);
for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
{
concatBuilder.append(nRepChildren);
}
res = concatBuilder.constructNode();
}
- else if (len.getKind() == kind::STRING_LENGTH)
+ else if (len.getKind() == STRING_LENGTH)
{
// len(x) -> x
res = len[0];
@@ -4293,1311 +3094,6 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
return res;
}
-Node SequencesRewriter::lengthPreserveRewrite(Node n)
-{
- NodeManager* nm = NodeManager::currentNM();
- Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
- Node res = canonicalStrForSymbolicLength(len, n.getType());
- return res.isNull() ? n : res;
-}
-
-Node SequencesRewriter::checkEntailContains(Node a, Node b, bool fullRewriter)
-{
- NodeManager* nm = NodeManager::currentNM();
- Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b);
-
- if (fullRewriter)
- {
- ctn = Rewriter::rewrite(ctn);
- }
- else
- {
- Node prev;
- do
- {
- prev = ctn;
- ctn = rewriteContains(ctn);
- } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN);
- }
-
- Assert(ctn.getType().isBoolean());
- return ctn.isConst() ? ctn : Node::null();
-}
-
-bool SequencesRewriter::checkEntailNonEmpty(Node a)
-{
- Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
- len = Rewriter::rewrite(len);
- return checkEntailArith(len, true);
-}
-
-bool SequencesRewriter::checkEntailLengthOne(Node s, bool strict)
-{
- NodeManager* nm = NodeManager::currentNM();
- Node one = nm->mkConst(Rational(1));
- Node len = nm->mkNode(STRING_LENGTH, s);
- len = Rewriter::rewrite(len);
- return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true));
-}
-
-bool SequencesRewriter::checkEntailArithEq(Node a, Node b)
-{
- if (a == b)
- {
- return true;
- }
- else
- {
- Node ar = Rewriter::rewrite(a);
- Node br = Rewriter::rewrite(b);
- return ar == br;
- }
-}
-
-bool SequencesRewriter::checkEntailArith(Node a, Node b, bool strict)
-{
- if (a == b)
- {
- return !strict;
- }
- else
- {
- Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b);
- return checkEntailArith(diff, strict);
- }
-}
-
-struct StrCheckEntailArithTag
-{
-};
-struct StrCheckEntailArithComputedTag
-{
-};
-/** Attribute true for expressions for which checkEntailArith returned true */
-typedef expr::Attribute<StrCheckEntailArithTag, bool> StrCheckEntailArithAttr;
-typedef expr::Attribute<StrCheckEntailArithComputedTag, bool>
- StrCheckEntailArithComputedAttr;
-
-bool SequencesRewriter::checkEntailArith(Node a, bool strict)
-{
- if (a.isConst())
- {
- return a.getConst<Rational>().sgn() >= (strict ? 1 : 0);
- }
-
- Node ar =
- strict
- ? NodeManager::currentNM()->mkNode(
- kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1)))
- : a;
- ar = Rewriter::rewrite(ar);
-
- if (ar.getAttribute(StrCheckEntailArithComputedAttr()))
- {
- return ar.getAttribute(StrCheckEntailArithAttr());
- }
-
- bool ret = checkEntailArithInternal(ar);
- if (!ret)
- {
- // try with approximations
- ret = checkEntailArithApprox(ar);
- }
- // cache the result
- ar.setAttribute(StrCheckEntailArithAttr(), ret);
- ar.setAttribute(StrCheckEntailArithComputedAttr(), true);
- return ret;
-}
-
-bool SequencesRewriter::checkEntailArithApprox(Node ar)
-{
- Assert(Rewriter::rewrite(ar) == ar);
- NodeManager* nm = NodeManager::currentNM();
- std::map<Node, Node> msum;
- Trace("strings-ent-approx-debug")
- << "Setup arithmetic approximations for " << ar << std::endl;
- if (!ArithMSum::getMonomialSum(ar, msum))
- {
- Trace("strings-ent-approx-debug")
- << "...failed to get monomial sum!" << std::endl;
- return false;
- }
- // for each monomial v*c, mApprox[v] a list of
- // possibilities for how the term can be soundly approximated, that is,
- // if mApprox[v] contains av, then v*c > av*c. Notice that if c
- // is positive, then v > av, otherwise if c is negative, then v < av.
- // In other words, av is an under-approximation if c is positive, and an
- // over-approximation if c is negative.
- bool changed = false;
- std::map<Node, std::vector<Node> > mApprox;
- // map from approximations to their monomial sums
- std::map<Node, std::map<Node, Node> > approxMsums;
- // aarSum stores each monomial that does not have multiple approximations
- std::vector<Node> aarSum;
- for (std::pair<const Node, Node>& m : msum)
- {
- Node v = m.first;
- Node c = m.second;
- Trace("strings-ent-approx-debug")
- << "Get approximations " << v << "..." << std::endl;
- if (v.isNull())
- {
- Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c;
- aarSum.push_back(mn);
- }
- else
- {
- // c.isNull() means c = 1
- bool isOverApprox = !c.isNull() && c.getConst<Rational>().sgn() == -1;
- std::vector<Node>& approx = mApprox[v];
- std::unordered_set<Node, NodeHashFunction> visited;
- std::vector<Node> toProcess;
- toProcess.push_back(v);
- do
- {
- Node curr = toProcess.back();
- Trace("strings-ent-approx-debug") << " process " << curr << std::endl;
- curr = Rewriter::rewrite(curr);
- toProcess.pop_back();
- if (visited.find(curr) == visited.end())
- {
- visited.insert(curr);
- std::vector<Node> currApprox;
- getArithApproximations(curr, currApprox, isOverApprox);
- if (currApprox.empty())
- {
- Trace("strings-ent-approx-debug")
- << "...approximation: " << curr << std::endl;
- // no approximations, thus curr is a possibility
- approx.push_back(curr);
- }
- else
- {
- toProcess.insert(
- toProcess.end(), currApprox.begin(), currApprox.end());
- }
- }
- } while (!toProcess.empty());
- Assert(!approx.empty());
- // if we have only one approximation, move it to final
- if (approx.size() == 1)
- {
- changed = v != approx[0];
- Node mn = ArithMSum::mkCoeffTerm(c, approx[0]);
- aarSum.push_back(mn);
- mApprox.erase(v);
- }
- else
- {
- // compute monomial sum form for each approximation, used below
- for (const Node& aa : approx)
- {
- if (approxMsums.find(aa) == approxMsums.end())
- {
- CVC4_UNUSED bool ret =
- ArithMSum::getMonomialSum(aa, approxMsums[aa]);
- Assert(ret);
- }
- }
- changed = true;
- }
- }
- }
- if (!changed)
- {
- // approximations had no effect, return
- Trace("strings-ent-approx-debug") << "...no approximations" << std::endl;
- return false;
- }
- // get the current "fixed" sum for the abstraction of ar
- Node aar = aarSum.empty()
- ? nm->mkConst(Rational(0))
- : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum));
- aar = Rewriter::rewrite(aar);
- Trace("strings-ent-approx-debug")
- << "...processed fixed sum " << aar << " with " << mApprox.size()
- << " approximated monomials." << std::endl;
- // if we have a choice of how to approximate
- if (!mApprox.empty())
- {
- // convert aar back to monomial sum
- std::map<Node, Node> msumAar;
- if (!ArithMSum::getMonomialSum(aar, msumAar))
- {
- return false;
- }
- if (Trace.isOn("strings-ent-approx"))
- {
- Trace("strings-ent-approx")
- << "---- Check arithmetic entailment by under-approximation " << ar
- << " >= 0" << std::endl;
- Trace("strings-ent-approx") << "FIXED:" << std::endl;
- ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx");
- Trace("strings-ent-approx") << "APPROX:" << std::endl;
- for (std::pair<const Node, std::vector<Node> >& a : mApprox)
- {
- Node c = msum[a.first];
- Trace("strings-ent-approx") << " ";
- if (!c.isNull())
- {
- Trace("strings-ent-approx") << c << " * ";
- }
- Trace("strings-ent-approx")
- << a.second << " ...from " << a.first << std::endl;
- }
- Trace("strings-ent-approx") << std::endl;
- }
- Rational one(1);
- // incorporate monomials one at a time that have a choice of approximations
- while (!mApprox.empty())
- {
- Node v;
- Node vapprox;
- int maxScore = -1;
- // Look at each approximation, take the one with the best score.
- // Notice that we are in the process of trying to prove
- // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0,
- // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar)
- // and approx_1 ... approx_m are possible approximations. The
- // intution here is that we want coefficients c1...cn to be positive.
- // This is because arithmetic string terms t1...tn (which may be
- // applications of len, indexof, str.to.int) are never entailed to be
- // negative. Hence, we add the approx_i that contributes the "most"
- // towards making all constants c1...cn positive and cancelling negative
- // monomials in approx_i itself.
- for (std::pair<const Node, std::vector<Node> >& nam : mApprox)
- {
- Node cr = msum[nam.first];
- for (const Node& aa : nam.second)
- {
- unsigned helpsCancelCount = 0;
- unsigned addsObligationCount = 0;
- std::map<Node, Node>::iterator it;
- // we are processing an approximation cr*( c1*t1 + ... + cn*tn )
- for (std::pair<const Node, Node>& aam : approxMsums[aa])
- {
- // Say aar is of the form t + c*ti, and aam is the monomial ci*ti
- // where ci != 0. We say aam:
- // (1) helps cancel if c != 0 and c>0 != ci>0
- // (2) adds obligation if c>=0 and c+ci<0
- Node ti = aam.first;
- Node ci = aam.second;
- if (!cr.isNull())
- {
- ci = ci.isNull() ? cr
- : Rewriter::rewrite(nm->mkNode(MULT, ci, cr));
- }
- Trace("strings-ent-approx-debug") << ci << "*" << ti << " ";
- int ciSgn = ci.isNull() ? 1 : ci.getConst<Rational>().sgn();
- it = msumAar.find(ti);
- if (it != msumAar.end())
- {
- Node c = it->second;
- int cSgn = c.isNull() ? 1 : c.getConst<Rational>().sgn();
- if (cSgn == 0)
- {
- addsObligationCount += (ciSgn == -1 ? 1 : 0);
- }
- else if (cSgn != ciSgn)
- {
- helpsCancelCount++;
- Rational r1 = c.isNull() ? one : c.getConst<Rational>();
- Rational r2 = ci.isNull() ? one : ci.getConst<Rational>();
- Rational r12 = r1 + r2;
- if (r12.sgn() == -1)
- {
- addsObligationCount++;
- }
- }
- }
- else
- {
- addsObligationCount += (ciSgn == -1 ? 1 : 0);
- }
- }
- Trace("strings-ent-approx-debug")
- << "counts=" << helpsCancelCount << "," << addsObligationCount
- << " for " << aa << " into " << aar << std::endl;
- int score = (addsObligationCount > 0 ? 0 : 2)
- + (helpsCancelCount > 0 ? 1 : 0);
- // if its the best, update v and vapprox
- if (v.isNull() || score > maxScore)
- {
- v = nam.first;
- vapprox = aa;
- maxScore = score;
- }
- }
- if (!v.isNull())
- {
- break;
- }
- }
- Trace("strings-ent-approx")
- << "- Decide " << v << " = " << vapprox << std::endl;
- // we incorporate v approximated by vapprox into the overall approximation
- // for ar
- Assert(!v.isNull() && !vapprox.isNull());
- Assert(msum.find(v) != msum.end());
- Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox);
- aar = nm->mkNode(PLUS, aar, mn);
- // update the msumAar map
- aar = Rewriter::rewrite(aar);
- msumAar.clear();
- if (!ArithMSum::getMonomialSum(aar, msumAar))
- {
- Assert(false);
- Trace("strings-ent-approx")
- << "...failed to get monomial sum!" << std::endl;
- return false;
- }
- // we have processed the approximation for v
- mApprox.erase(v);
- }
- Trace("strings-ent-approx") << "-----------------" << std::endl;
- }
- if (aar == ar)
- {
- Trace("strings-ent-approx-debug")
- << "...approximation had no effect" << std::endl;
- // this should never happen, but we avoid the infinite loop for sanity here
- Assert(false);
- return false;
- }
- // Check entailment on the approximation of ar.
- // Notice that this may trigger further reasoning by approximation. For
- // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be
- // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on
- // this call, where in the recursive call we may over-approximate
- // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer
- // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two
- // steps.
- if (checkEntailArith(aar))
- {
- Trace("strings-ent-approx")
- << "*** StrArithApprox: showed " << ar
- << " >= 0 using under-approximation!" << std::endl;
- Trace("strings-ent-approx")
- << "*** StrArithApprox: under-approximation was " << aar << std::endl;
- return true;
- }
- return false;
-}
-
-void SequencesRewriter::getArithApproximations(Node a,
- std::vector<Node>& approx,
- bool isOverApprox)
-{
- NodeManager* nm = NodeManager::currentNM();
- // We do not handle PLUS here since this leads to exponential behavior.
- // Instead, this is managed, e.g. during checkEntailArithApprox, where
- // PLUS terms are expanded "on-demand" during the reasoning.
- Trace("strings-ent-approx-debug")
- << "Get arith approximations " << a << std::endl;
- Kind ak = a.getKind();
- if (ak == MULT)
- {
- Node c;
- Node v;
- if (ArithMSum::getMonomial(a, c, v))
- {
- bool isNeg = c.getConst<Rational>().sgn() == -1;
- getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox);
- for (unsigned i = 0, size = approx.size(); i < size; i++)
- {
- approx[i] = nm->mkNode(MULT, c, approx[i]);
- }
- }
- }
- else if (ak == STRING_LENGTH)
- {
- Kind aak = a[0].getKind();
- if (aak == STRING_SUBSTR)
- {
- // over,under-approximations for len( substr( x, n, m ) )
- Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
- if (isOverApprox)
- {
- // m >= 0 implies
- // m >= len( substr( x, n, m ) )
- if (checkEntailArith(a[0][2]))
- {
- approx.push_back(a[0][2]);
- }
- if (checkEntailArith(lenx, a[0][1]))
- {
- // n <= len( x ) implies
- // len( x ) - n >= len( substr( x, n, m ) )
- approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
- }
- else
- {
- // len( x ) >= len( substr( x, n, m ) )
- approx.push_back(lenx);
- }
- }
- else
- {
- // 0 <= n and n+m <= len( x ) implies
- // m <= len( substr( x, n, m ) )
- Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]);
- if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm))
- {
- approx.push_back(a[0][2]);
- }
- // 0 <= n and n+m >= len( x ) implies
- // len(x)-n <= len( substr( x, n, m ) )
- if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx))
- {
- approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
- }
- }
- }
- else if (aak == STRING_STRREPL)
- {
- // over,under-approximations for len( replace( x, y, z ) )
- // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) )
- Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
- Node leny = nm->mkNode(STRING_LENGTH, a[0][1]);
- Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]);
- if (isOverApprox)
- {
- if (checkEntailArith(leny, lenz))
- {
- // len( y ) >= len( z ) implies
- // len( x ) >= len( replace( x, y, z ) )
- approx.push_back(lenx);
- }
- else
- {
- // len( x ) + len( z ) >= len( replace( x, y, z ) )
- approx.push_back(nm->mkNode(PLUS, lenx, lenz));
- }
- }
- else
- {
- if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx))
- {
- // len( y ) <= len( z ) or len( x ) <= len( z ) implies
- // len( x ) <= len( replace( x, y, z ) )
- approx.push_back(lenx);
- }
- else
- {
- // len( x ) - len( y ) <= len( replace( x, y, z ) )
- approx.push_back(nm->mkNode(MINUS, lenx, leny));
- }
- }
- }
- else if (aak == STRING_ITOS)
- {
- // over,under-approximations for len( int.to.str( x ) )
- if (isOverApprox)
- {
- if (checkEntailArith(a[0][0], false))
- {
- if (checkEntailArith(a[0][0], true))
- {
- // x > 0 implies
- // x >= len( int.to.str( x ) )
- approx.push_back(a[0][0]);
- }
- else
- {
- // x >= 0 implies
- // x+1 >= len( int.to.str( x ) )
- approx.push_back(
- nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0]));
- }
- }
- }
- else
- {
- if (checkEntailArith(a[0][0]))
- {
- // x >= 0 implies
- // len( int.to.str( x ) ) >= 1
- approx.push_back(nm->mkConst(Rational(1)));
- }
- // other crazy things are possible here, e.g.
- // len( int.to.str( len( y ) + 10 ) ) >= 2
- }
- }
- }
- else if (ak == STRING_STRIDOF)
- {
- // over,under-approximations for indexof( x, y, n )
- if (isOverApprox)
- {
- Node lenx = nm->mkNode(STRING_LENGTH, a[0]);
- Node leny = nm->mkNode(STRING_LENGTH, a[1]);
- if (checkEntailArith(lenx, leny))
- {
- // len( x ) >= len( y ) implies
- // len( x ) - len( y ) >= indexof( x, y, n )
- approx.push_back(nm->mkNode(MINUS, lenx, leny));
- }
- else
- {
- // len( x ) >= indexof( x, y, n )
- approx.push_back(lenx);
- }
- }
- else
- {
- // TODO?:
- // contains( substr( x, n, len( x ) ), y ) implies
- // n <= indexof( x, y, n )
- // ...hard to test, runs risk of non-termination
-
- // -1 <= indexof( x, y, n )
- approx.push_back(nm->mkConst(Rational(-1)));
- }
- }
- else if (ak == STRING_STOI)
- {
- // over,under-approximations for str.to.int( x )
- if (isOverApprox)
- {
- // TODO?:
- // y >= 0 implies
- // y >= str.to.int( int.to.str( y ) )
- }
- else
- {
- // -1 <= str.to.int( x )
- approx.push_back(nm->mkConst(Rational(-1)));
- }
- }
- Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl;
-}
-
-bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b)
-{
- std::vector<Node> avec;
- utils::getConcat(getMultisetApproximation(a), avec);
- std::vector<Node> bvec;
- utils::getConcat(b, bvec);
-
- std::map<Node, unsigned> num_nconst[2];
- std::map<Node, unsigned> num_const[2];
- for (unsigned j = 0; j < 2; j++)
- {
- std::vector<Node>& jvec = j == 0 ? avec : bvec;
- for (const Node& cc : jvec)
- {
- if (cc.isConst())
- {
- num_const[j][cc]++;
- }
- else
- {
- num_nconst[j][cc]++;
- }
- }
- }
- bool ms_success = true;
- for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
- {
- if (nncp.second > num_nconst[1][nncp.first])
- {
- ms_success = false;
- break;
- }
- }
- if (ms_success)
- {
- // count the number of constant characters in the first argument
- std::map<Node, unsigned> count_const[2];
- std::vector<Node> chars;
- for (unsigned j = 0; j < 2; j++)
- {
- for (std::pair<const Node, unsigned>& ncp : num_const[j])
- {
- Node cn = ncp.first;
- Assert(cn.isConst());
- std::vector<Node> cnChars = Word::getChars(cn);
- for (const Node& ch : cnChars)
- {
- count_const[j][ch] += ncp.second;
- if (std::find(chars.begin(), chars.end(), ch) == chars.end())
- {
- chars.push_back(ch);
- }
- }
- }
- }
- Trace("strings-entail-ms-ss")
- << "For " << a << " and " << b << " : " << std::endl;
- for (const Node& ch : chars)
- {
- Trace("strings-entail-ms-ss") << " # occurrences of substring ";
- Trace("strings-entail-ms-ss") << ch << " in arguments is ";
- Trace("strings-entail-ms-ss")
- << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
- if (count_const[0][ch] < count_const[1][ch])
- {
- return true;
- }
- }
-
- // TODO (#1180): count the number of 2,3,4,.. character substrings
- // for example:
- // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
- // since the second argument contains more occurrences of "bb".
- // note this is orthogonal reasoning to inductive reasoning
- // via regular membership reduction in Liang et al CAV 2015.
- }
- return false;
-}
-
-Node SequencesRewriter::checkEntailHomogeneousString(Node a)
-{
- std::vector<Node> avec;
- utils::getConcat(getMultisetApproximation(a), avec);
-
- bool cValid = false;
- Node c;
- for (const Node& ac : avec)
- {
- if (ac.isConst())
- {
- std::vector<Node> acv = Word::getChars(ac);
- for (const Node& cc : acv)
- {
- if (!cValid)
- {
- cValid = true;
- c = cc;
- }
- else if (c != cc)
- {
- // Found a different character
- return Node::null();
- }
- }
- }
- else
- {
- // Could produce a different character
- return Node::null();
- }
- }
-
- if (!cValid)
- {
- return Word::mkEmptyWord(a.getType());
- }
-
- return c;
-}
-
-Node SequencesRewriter::getMultisetApproximation(Node a)
-{
- NodeManager* nm = NodeManager::currentNM();
- if (a.getKind() == STRING_SUBSTR)
- {
- return a[0];
- }
- else if (a.getKind() == STRING_STRREPL)
- {
- return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
- }
- else if (a.getKind() == STRING_CONCAT)
- {
- NodeBuilder<> nb(STRING_CONCAT);
- for (const Node& ac : a)
- {
- nb << getMultisetApproximation(ac);
- }
- return nb.constructNode();
- }
- else
- {
- return a;
- }
-}
-
-bool SequencesRewriter::checkEntailArithWithEqAssumption(Node assumption,
- Node a,
- bool strict)
-{
- Assert(assumption.getKind() == kind::EQUAL);
- Assert(Rewriter::rewrite(assumption) == assumption);
-
- // Find candidates variables to compute substitutions for
- std::unordered_set<Node, NodeHashFunction> candVars;
- std::vector<Node> toVisit = {assumption};
- while (!toVisit.empty())
- {
- Node curr = toVisit.back();
- toVisit.pop_back();
-
- if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT
- || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL)
- {
- for (const auto& currChild : curr)
- {
- toVisit.push_back(currChild);
- }
- }
- else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH)
- {
- candVars.insert(curr);
- }
- else if (curr.getKind() == kind::STRING_LENGTH)
- {
- candVars.insert(curr);
- }
- }
-
- // Check if any of the candidate variables are in n
- Node v;
- Assert(toVisit.empty());
- toVisit.push_back(a);
- while (!toVisit.empty())
- {
- Node curr = toVisit.back();
- toVisit.pop_back();
-
- for (const auto& currChild : curr)
- {
- toVisit.push_back(currChild);
- }
-
- if (candVars.find(curr) != candVars.end())
- {
- v = curr;
- break;
- }
- }
-
- if (v.isNull())
- {
- // No suitable candidate found
- return false;
- }
-
- Node solution = ArithMSum::solveEqualityFor(assumption, v);
- if (solution.isNull())
- {
- // Could not solve for v
- return false;
- }
-
- a = a.substitute(TNode(v), TNode(solution));
- return checkEntailArith(a, strict);
-}
-
-bool SequencesRewriter::checkEntailArithWithAssumption(Node assumption,
- Node a,
- Node b,
- bool strict)
-{
- Assert(Rewriter::rewrite(assumption) == assumption);
-
- NodeManager* nm = NodeManager::currentNM();
-
- if (!assumption.isConst() && assumption.getKind() != kind::EQUAL)
- {
- // We rewrite inequality assumptions from x <= y to x + (str.len s) = y
- // where s is some fresh string variable. We use (str.len s) because
- // (str.len s) must be non-negative for the equation to hold.
- Node x, y;
- if (assumption.getKind() == kind::GEQ)
- {
- x = assumption[0];
- y = assumption[1];
- }
- else
- {
- // (not (>= s t)) --> (>= (t - 1) s)
- Assert(assumption.getKind() == kind::NOT
- && assumption[0].getKind() == kind::GEQ);
- x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1)));
- y = assumption[0][0];
- }
-
- Node s = nm->mkBoundVar("slackVal", nm->stringType());
- Node slen = nm->mkNode(kind::STRING_LENGTH, s);
- assumption = Rewriter::rewrite(
- nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen)));
- }
-
- Node diff = nm->mkNode(kind::MINUS, a, b);
- bool res = false;
- if (assumption.isConst())
- {
- bool assumptionBool = assumption.getConst<bool>();
- if (assumptionBool)
- {
- res = checkEntailArith(diff, strict);
- }
- else
- {
- res = true;
- }
- }
- else
- {
- res = checkEntailArithWithEqAssumption(assumption, diff, strict);
- }
- return res;
-}
-
-bool SequencesRewriter::checkEntailArithWithAssumptions(
- std::vector<Node> assumptions, Node a, Node b, bool strict)
-{
- // TODO: We currently try to show the entailment with each assumption
- // independently. In the future, we should make better use of multiple
- // assumptions.
- bool res = false;
- for (const auto& assumption : assumptions)
- {
- Assert(Rewriter::rewrite(assumption) == assumption);
-
- if (checkEntailArithWithAssumption(assumption, a, b, strict))
- {
- res = true;
- break;
- }
- }
- return res;
-}
-
-Node SequencesRewriter::getConstantArithBound(Node a, bool isLower)
-{
- Assert(Rewriter::rewrite(a) == a);
- Node ret;
- if (a.isConst())
- {
- ret = a;
- }
- else if (a.getKind() == kind::STRING_LENGTH)
- {
- if (isLower)
- {
- ret = NodeManager::currentNM()->mkConst(Rational(0));
- }
- }
- else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
- {
- std::vector<Node> children;
- bool success = true;
- for (unsigned i = 0; i < a.getNumChildren(); i++)
- {
- Node ac = getConstantArithBound(a[i], isLower);
- if (ac.isNull())
- {
- ret = ac;
- success = false;
- break;
- }
- else
- {
- if (ac.getConst<Rational>().sgn() == 0)
- {
- if (a.getKind() == kind::MULT)
- {
- ret = ac;
- success = false;
- break;
- }
- }
- else
- {
- if (a.getKind() == kind::MULT)
- {
- if ((ac.getConst<Rational>().sgn() > 0) != isLower)
- {
- ret = Node::null();
- success = false;
- break;
- }
- }
- children.push_back(ac);
- }
- }
- }
- if (success)
- {
- if (children.empty())
- {
- ret = NodeManager::currentNM()->mkConst(Rational(0));
- }
- else if (children.size() == 1)
- {
- ret = children[0];
- }
- else
- {
- ret = NodeManager::currentNM()->mkNode(a.getKind(), children);
- ret = Rewriter::rewrite(ret);
- }
- }
- }
- Trace("strings-rewrite-cbound")
- << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a
- << " is " << ret << std::endl;
- Assert(ret.isNull() || ret.isConst());
- // entailment check should be at least as powerful as computing a lower bound
- Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() < 0
- || checkEntailArith(a, false));
- Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() <= 0
- || checkEntailArith(a, true));
- return ret;
-}
-
-Node SequencesRewriter::getFixedLengthForRegexp(Node n)
-{
- NodeManager* nm = NodeManager::currentNM();
- if (n.getKind() == STRING_TO_REGEXP)
- {
- Node ret = nm->mkNode(STRING_LENGTH, n[0]);
- ret = Rewriter::rewrite(ret);
- if (ret.isConst())
- {
- return ret;
- }
- }
- else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
- {
- return nm->mkConst(Rational(1));
- }
- else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
- {
- Node ret;
- for (const Node& nc : n)
- {
- Node flc = getFixedLengthForRegexp(nc);
- if (flc.isNull() || (!ret.isNull() && ret != flc))
- {
- return Node::null();
- }
- else if (ret.isNull())
- {
- // first time
- ret = flc;
- }
- }
- return ret;
- }
- else if (n.getKind() == REGEXP_CONCAT)
- {
- NodeBuilder<> nb(PLUS);
- for (const Node& nc : n)
- {
- Node flc = getFixedLengthForRegexp(nc);
- if (flc.isNull())
- {
- return flc;
- }
- nb << flc;
- }
- Node ret = nb.constructNode();
- ret = Rewriter::rewrite(ret);
- return ret;
- }
- return Node::null();
-}
-
-bool SequencesRewriter::checkEntailArithInternal(Node a)
-{
- Assert(Rewriter::rewrite(a) == a);
- // check whether a >= 0
- if (a.isConst())
- {
- return a.getConst<Rational>().sgn() >= 0;
- }
- else if (a.getKind() == kind::STRING_LENGTH)
- {
- // str.len( t ) >= 0
- return true;
- }
- else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
- {
- for (unsigned i = 0; i < a.getNumChildren(); i++)
- {
- if (!checkEntailArithInternal(a[i]))
- {
- return false;
- }
- }
- // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0
- return true;
- }
-
- return false;
-}
-
-Node SequencesRewriter::decomposeSubstrChain(Node s,
- std::vector<Node>& ss,
- std::vector<Node>& ls)
-{
- Assert(ss.empty());
- Assert(ls.empty());
- while (s.getKind() == STRING_SUBSTR)
- {
- ss.push_back(s[1]);
- ls.push_back(s[2]);
- s = s[0];
- }
- std::reverse(ss.begin(), ss.end());
- std::reverse(ls.begin(), ls.end());
- return s;
-}
-
-Node SequencesRewriter::mkSubstrChain(Node base,
- const std::vector<Node>& ss,
- const std::vector<Node>& ls)
-{
- NodeManager* nm = NodeManager::currentNM();
- for (unsigned i = 0, size = ss.size(); i < size; i++)
- {
- base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
- }
- return base;
-}
-
-Node SequencesRewriter::getStringOrEmpty(Node n)
-{
- NodeManager* nm = NodeManager::currentNM();
- Node res;
- while (res.isNull())
- {
- switch (n.getKind())
- {
- case kind::STRING_STRREPL:
- {
- Node empty = nm->mkConst(::CVC4::String(""));
- if (n[0] == empty)
- {
- // (str.replace "" x y) --> y
- n = n[2];
- break;
- }
-
- if (checkEntailLengthOne(n[0]) && n[2] == empty)
- {
- // (str.replace "A" x "") --> "A"
- res = n[0];
- break;
- }
-
- res = n;
- break;
- }
- case kind::STRING_SUBSTR:
- {
- if (checkEntailLengthOne(n[0]))
- {
- // (str.substr "A" x y) --> "A"
- res = n[0];
- break;
- }
- res = n;
- break;
- }
- default:
- {
- res = n;
- break;
- }
- }
- }
- return res;
-}
-
-bool SequencesRewriter::inferZerosInSumGeq(Node x,
- std::vector<Node>& ys,
- std::vector<Node>& zeroYs)
-{
- Assert(zeroYs.empty());
-
- NodeManager* nm = NodeManager::currentNM();
-
- // Check if we can show that y1 + ... + yn >= x
- Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0];
- if (!checkEntailArith(sum, x))
- {
- return false;
- }
-
- // Try to remove yi one-by-one and check if we can still show:
- //
- // y1 + ... + yi-1 + yi+1 + ... + yn >= x
- //
- // If that's the case, we know that yi can be zero and the inequality still
- // holds.
- size_t i = 0;
- while (i < ys.size())
- {
- Node yi = ys[i];
- std::vector<Node>::iterator pos = ys.erase(ys.begin() + i);
- if (ys.size() > 1)
- {
- sum = nm->mkNode(PLUS, ys);
- }
- else
- {
- sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0));
- }
-
- if (checkEntailArith(sum, x))
- {
- zeroYs.push_back(yi);
- }
- else
- {
- ys.insert(pos, yi);
- i++;
- }
- }
- return true;
-}
-
-Node SequencesRewriter::inferEqsFromContains(Node x, Node y)
-{
- NodeManager* nm = NodeManager::currentNM();
- Node emp = nm->mkConst(String(""));
- Assert(x.getType() == y.getType());
- TypeNode stype = x.getType();
-
- Node xLen = nm->mkNode(STRING_LENGTH, x);
- std::vector<Node> yLens;
- if (y.getKind() != STRING_CONCAT)
- {
- yLens.push_back(nm->mkNode(STRING_LENGTH, y));
- }
- else
- {
- for (const Node& yi : y)
- {
- yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
- }
- }
-
- std::vector<Node> zeroLens;
- if (x == emp)
- {
- // If x is the empty string, then all ys must be empty, too, and we can
- // skip the expensive checks. Note that this is just a performance
- // optimization.
- zeroLens.swap(yLens);
- }
- else
- {
- // Check if we can infer that str.len(x) <= str.len(y). If that is the
- // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
- // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
- // true. The terms that can have length zero without making the inequality
- // false must be all be empty if (str.contains x y) is true.
- if (!inferZerosInSumGeq(xLen, yLens, zeroLens))
- {
- // We could not prove that the inequality holds
- return Node::null();
- }
- else if (yLens.size() == y.getNumChildren())
- {
- // We could only prove that the inequality holds but not that any of the
- // ys must be empty
- return nm->mkNode(EQUAL, x, y);
- }
- }
-
- if (y.getKind() != STRING_CONCAT)
- {
- if (zeroLens.size() == 1)
- {
- // y is not a concatenation and we found that it must be empty, so just
- // return (= y "")
- Assert(zeroLens[0][0] == y);
- return nm->mkNode(EQUAL, y, emp);
- }
- else
- {
- Assert(yLens.size() == 1 && yLens[0][0] == y);
- return nm->mkNode(EQUAL, x, y);
- }
- }
-
- std::vector<Node> cs;
- for (const Node& yiLen : yLens)
- {
- Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
- cs.push_back(yiLen[0]);
- }
-
- NodeBuilder<> nb(AND);
- // (= x (str.++ y1' ... ym'))
- if (!cs.empty())
- {
- nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
- }
- // (= y1'' "") ... (= yk'' "")
- for (const Node& zeroLen : zeroLens)
- {
- Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
- nb << nm->mkNode(EQUAL, zeroLen[0], emp);
- }
-
- // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
- return nb.constructNode();
-}
-
-std::pair<bool, std::vector<Node> > SequencesRewriter::collectEmptyEqs(Node x)
-{
- NodeManager* nm = NodeManager::currentNM();
- Node empty = nm->mkConst(::CVC4::String(""));
-
- // Collect the equalities of the form (= x "") (sorted)
- std::set<TNode> emptyNodes;
- bool allEmptyEqs = true;
- if (x.getKind() == kind::EQUAL)
- {
- if (x[0] == empty)
- {
- emptyNodes.insert(x[1]);
- }
- else if (x[1] == empty)
- {
- emptyNodes.insert(x[0]);
- }
- else
- {
- allEmptyEqs = false;
- }
- }
- else if (x.getKind() == kind::AND)
- {
- for (const Node& c : x)
- {
- if (c.getKind() == kind::EQUAL)
- {
- if (c[0] == empty)
- {
- emptyNodes.insert(c[1]);
- }
- else if (c[1] == empty)
- {
- emptyNodes.insert(c[0]);
- }
- }
- else
- {
- allEmptyEqs = false;
- }
- }
- }
-
- if (emptyNodes.size() == 0)
- {
- allEmptyEqs = false;
- }
-
- return std::make_pair(
- allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
-}
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
{
@@ -5646,3 +3142,7 @@ Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
}
return ret;
}
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h
index 0e5cd5705..7391a7bd0 100644
--- a/src/theory/strings/sequences_rewriter.h
+++ b/src/theory/strings/sequences_rewriter.h
@@ -18,14 +18,11 @@
#ifndef CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H
#define CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H
-#include <climits>
-#include <utility>
#include <vector>
-#include "expr/attribute.h"
+#include "expr/node.h"
#include "theory/strings/rewrites.h"
#include "theory/theory_rewriter.h"
-#include "theory/type_enumerator.h"
namespace CVC4 {
namespace theory {
@@ -34,69 +31,6 @@ namespace strings {
class SequencesRewriter : public TheoryRewriter
{
protected:
- /** simple regular expression consume
- *
- * This method is called when we are rewriting a membership of the form
- * s1 ++ ... ++ sn in r1 ++ ... ++ rm
- * We have that mchildren consists of the strings s1...sn, and children
- * consists of the regular expressions r1...rm.
- *
- * This method tries to strip off parts of the concatenation terms. It updates
- * the vectors such that the resulting vectors are such that the membership
- * mchildren[n'...n''] in children[m'...m''] is equivalent to the input
- * membership. The argument dir indicates the direction to consider, where
- * 0 means strip off the front, 1 off the back, and < 0 off of both.
- *
- * If this method returns the false node, then we have inferred that no
- * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or
- * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null
- * node.
- *
- * For example, given input
- * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0,
- * this method updates:
- * mchildren = { "b", x }, children = { ("cd")* }
- * and returns null.
- *
- * For example, given input
- * { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1,
- * this method updates:
- * { "b" }, { [[y]] }
- * where [[.]] denotes str.to.re, and returns null.
- *
- * Notice that the above requirement for returning false is stronger than
- * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false.
- * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false
- * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb".
- * We do not return false even though the above membership is equivalent
- * to false. We do this because the function is used e.g. to test whether a
- * possible unrolling leads to a conflict. This is demonstrated by the
- * following examples:
- *
- * For example, given input
- * { "bb", x }, { "b", ("a")* } and dir=-1,
- * this method updates:
- * { "b" }, { ("a")* }
- * and returns null.
- *
- * For example, given input
- * { "cb", x }, { "b", ("a")* } and dir=-1,
- * this method leaves children and mchildren unchanged and returns false.
- *
- * Notice that based on this, we can determine that:
- * "cb" ++ x in ( "b" ++ ("a")* )*
- * is equivalent to false, whereas we cannot determine that:
- * "bb" ++ x in ( "b" ++ ("a")* )*
- * is equivalent to false.
- */
- static Node simpleRegexpConsume(std::vector<Node>& mchildren,
- std::vector<Node>& children,
- int dir = -1);
- static bool isConstRegExp(TNode t);
- static bool testConstStringInRegExp(CVC4::String& s,
- unsigned int index_start,
- TNode r);
-
/** rewrite regular expression concatenation
*
* This is the entry point for post-rewriting applications of re.++.
@@ -159,12 +93,6 @@ class SequencesRewriter : public TheoryRewriter
*/
static Node rewriteMembership(TNode node);
- static bool hasEpsilonNode(TNode node);
- /** check entail arithmetic internal
- * Returns true if we can show a >= 0 always.
- * a is in rewritten form.
- */
- static bool checkEntailArithInternal(Node a);
/** rewrite string equality extended
*
* This method returns a formula that is equivalent to the equality between
@@ -292,233 +220,6 @@ class SequencesRewriter : public TheoryRewriter
*/
static Node rewriteStringToCode(Node node);
- static Node splitConstant(Node a, Node b, int& index, bool isRev);
- /** can constant contain list
- * return true if constant c can contain the list l in order
- * firstc/lastc store which indices in l were used to determine the return
- * value.
- * (This is typically used when this function returns false, for minimizing
- * explanations)
- *
- * For example:
- * canConstantContainList( "abc", { x, "c", y } ) returns true
- * firstc/lastc are updated to 1/1
- * canConstantContainList( "abc", { x, "d", y } ) returns false
- * firstc/lastc are updated to 1/1
- * canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w }
- * returns false
- * firstc/lastc are updated to 1/3
- * canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w }
- * returns false
- * firstc/lastc are updated to 1/5
- */
- static bool canConstantContainList(Node c,
- std::vector<Node>& l,
- int& firstc,
- int& lastc);
- /** can constant contain concat
- * same as above but with n = str.++( l ) instead of l
- */
- static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc);
-
- /** strip symbolic length
- *
- * This function strips off components of n1 whose length is less than
- * or equal to argument curr, and stores them in nr. The direction
- * dir determines whether the components are removed from the start
- * or end of n1.
- *
- * In detail, this function updates n1 to n1' such that:
- * If dir=1,
- * n1 = str.++( nr, n1' )
- * If dir=-1
- * n1 = str.++( n1', nr )
- * It updates curr to curr' such that:
- * curr' = curr - str.len( str.++( nr ) ), and
- * curr' >= 0
- * where the latter fact is determined by checkArithEntail.
- *
- * This function returns true if n1 is modified.
- *
- * For example:
- *
- * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 )
- * returns true
- * n1 is updated to { "bc", y }
- * nr is updated to { x, "a" }
- * curr is updated to 0 *
- *
- * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 )
- * returns false
- *
- * stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 )
- * returns false
- *
- * stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 )
- * returns true
- * n1 is updated to { x }
- * nr is updated to { "abc", y }
- * curr is updated to str.len(y)+1
- */
- static bool stripSymbolicLength(std::vector<Node>& n1,
- std::vector<Node>& nr,
- int dir,
- Node& curr);
- /** component contains
- * This function is used when rewriting str.contains( t1, t2 ), where
- * n1 is the vector form of t1
- * n2 is the vector form of t2
- *
- * If this function returns n>=0 for some n, then
- * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm },
- * n2 = { y1...ys },
- * y1 is a suffix of xn,
- * y2...y{s-1} = x{n+1}...x{n+s-1}, and
- * ys is a prefix of x{n+s}
- * Otherwise it returns -1.
- *
- * This function may update n1 if computeRemainder = true.
- * We maintain the invariant that the resulting value n1'
- * of n1 after this function is such that:
- * n1 = str.++( nb, n1', ne )
- * The vectors nb and ne have the following properties.
- * If computeRemainder = true, then
- * If remainderDir != -1, then
- * ne is { x{n+s}' x{n+s+1}...xm }
- * where x{n+s} = str.++( ys, x{n+s}' ).
- * If remainderDir != 1, then
- * nb is { x1, ..., x{n-1}, xn' }
- * where xn = str.++( xn', y1 ).
- *
- * For example:
- *
- * componentContains({ x, "abc", x }, { "b" }, {}, true, 0)
- * returns 1,
- * n1 is updated to { "b" },
- * nb is updated to { x, "a" },
- * ne is updated to { "c", x }
- *
- * componentContains({ x, "abc", x }, { "b" }, {}, true, 1)
- * returns 1,
- * n1 is updated to { x, "ab" },
- * ne is updated to { "c", x }
- *
- * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1)
- * returns 2,
- * n1 is updated to { y, z, "abc", x, "de" },
- * ne is updated to { "f" }
- *
- * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1)
- * returns 1,
- * n1 is updated to { "c", x, "def" },
- * nb is updated to { y, "ab" }
- */
- static int componentContains(std::vector<Node>& n1,
- std::vector<Node>& n2,
- std::vector<Node>& nb,
- std::vector<Node>& ne,
- bool computeRemainder = false,
- int remainderDir = 0);
- /** component contains base
- *
- * This function is a helper for the above function.
- *
- * It returns true if n2 is contained in n1 with the following
- * restrictions:
- * If dir=1, then n2 must be a suffix of n1.
- * If dir=-1, then n2 must be a prefix of n1.
- *
- * If computeRemainder is true, then n1rb and n1re are
- * updated such that :
- * n1 = str.++( n1rb, n2, n1re )
- * where a null value of n1rb and n1re indicates the
- * empty string.
- *
- * For example:
- *
- * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false)
- * returns false.
- *
- * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true)
- * returns true,
- * n1rb is set to "c",
- * n1re is set to "e".
- *
- * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true)
- * returns true,
- * n1re is set to str.substr(y,5,str.len(y)).
- *
- *
- * Notice that this function may return false when it cannot compute a
- * remainder when it otherwise would have returned true. For example:
- *
- * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false)
- * returns true.
- *
- * Hence, we know that str.substr(y,x,z) is contained in y. However:
- *
- * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true)
- * returns false.
- *
- * The reason is since computeRemainder=true, it must be that
- * y = str.++( n1rb, str.substr(y,x,z), n1re )
- * for some n1rb, n1re. However, to construct such n1rb, n1re would require
- * e.g. the terms:
- * y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ),
- * str.substr(y,x,z),
- * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) )
- *
- * Since we do not wish to introduce ITE terms in the rewriter, we instead
- * return false, indicating that we cannot compute the remainder.
- */
- static bool componentContainsBase(
- Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder);
- /** strip constant endpoints
- * This function is used when rewriting str.contains( t1, t2 ), where
- * n1 is the vector form of t1
- * n2 is the vector form of t2
- *
- * It modifies n1 to a new vector n1' such that:
- * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to
- * str.contains( str.++( n1' ), str.++( n2 ) )
- * (2) str.++( n1 ) = str.++( nb, n1', ne )
- *
- * "dir" is the direction in which we can modify n1:
- * if dir = 1, then we allow dropping components from the front of n1,
- * if dir = -1, then we allow dropping components from the back of n1,
- * if dir = 0, then we allow dropping components from either.
- *
- * It returns true if n1 is modified.
- *
- * For example:
- * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1)
- * returns true,
- * n1 is updated to { x, "de" }
- * nb is updated to { "ab" }
- * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0)
- * returns true,
- * n1 is updated to { "b", x, "d" }
- * nb is updated to { "a" }
- * ne is updated to { "e" }
- * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1)
- * returns true,
- * n1 is updated to {"ad"}
- * ne is updated to { substr("ccc",x,y) }
- */
- static bool stripConstantEndpoints(std::vector<Node>& n1,
- std::vector<Node>& n2,
- std::vector<Node>& nb,
- std::vector<Node>& ne,
- int dir = 0);
-
- /**
- * Given a symbolic length n, returns the canonical string (of type stype)
- * for that length. For example if n is constant, this function returns a
- * string consisting of "A" repeated n times. Returns the null node if no such
- * string exists.
- */
- static Node canonicalStrForSymbolicLength(Node n, TypeNode stype);
-
/** length preserving rewrite
*
* Given input n, this returns a string n' whose length is equivalent to n.
@@ -528,289 +229,12 @@ class SequencesRewriter : public TheoryRewriter
static Node lengthPreserveRewrite(Node n);
/**
- * Checks whether a string term `a` is entailed to contain or not contain a
- * string term `b`.
- *
- * @param a The string that is checked whether it contains `b`
- * @param b The string that is checked whether it is contained in `a`
- * @param fullRewriter Determines whether the function can use the full
- * rewriter or only `rewriteContains()` (useful for avoiding loops)
- * @return true node if it can be shown that `a` contains `b`, false node if
- * it can be shown that `a` does not contain `b`, null node otherwise
- */
- static Node checkEntailContains(Node a, Node b, bool fullRewriter = true);
-
- /** entail non-empty
- *
- * Checks whether string a is entailed to be non-empty. Is equivalent to
- * the call checkArithEntail( len( a ), true ).
- */
- static bool checkEntailNonEmpty(Node a);
-
- /**
- * Checks whether string has at most/exactly length one. Length one strings
- * can be used for more aggressive rewriting because there is guaranteed that
- * it cannot be overlap multiple components in a string concatenation.
- *
- * @param s The string to check
- * @param strict If true, the string must have exactly length one, otherwise
- * at most length one
- * @return True if the string has at most/exactly length one, false otherwise
- */
- static bool checkEntailLengthOne(Node s, bool strict = false);
-
- /** check arithmetic entailment equal
- * Returns true if it is always the case that a = b.
- */
- static bool checkEntailArithEq(Node a, Node b);
- /** check arithmetic entailment
- * Returns true if it is always the case that a >= b,
- * and a>b if strict is true.
- */
- static bool checkEntailArith(Node a, Node b, bool strict = false);
- /** check arithmetic entailment
- * Returns true if it is always the case that a >= 0.
- */
- static bool checkEntailArith(Node a, bool strict = false);
- /** check arithmetic entailment with approximations
- *
- * Returns true if it is always the case that a >= 0. We expect that a is in
- * rewritten form.
- *
- * This function uses "approximation" techniques that under-approximate
- * the value of a for the purposes of showing the entailment holds. For
- * example, given:
- * len( x ) - len( substr( y, 0, len( x ) ) )
- * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above
- * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0,
- * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0
- * holds.
- */
- static bool checkEntailArithApprox(Node a);
- /** Get arithmetic approximations
- *
- * This gets the (set of) arithmetic approximations for term a and stores
- * them in approx. If isOverApprox is true, these are over-approximations
- * for the value of a, otherwise, they are underapproximations. For example,
- * an over-approximation for len( substr( y, n, m ) ) is m; an
- * under-approximation for indexof( x, y, n ) is -1.
- *
- * Notice that this function is not generally recursive (although it may make
- * a small bounded of recursive calls). Instead, it returns the shape
- * of the approximations for a. For example, an under-approximation
- * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this
- * function might be len( substr( x, 0, n ) ) - len( y ), where we don't
- * consider (recursively) the approximations for len( substr( x, 0, n ) ).
- */
- static void getArithApproximations(Node a,
- std::vector<Node>& approx,
- bool isOverApprox = false);
-
- /**
- * Checks whether it is always true that `a` is a strict subset of `b` in the
- * multiset domain.
- *
- * Examples:
- *
- * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true
- * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true
- * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false
- *
- * @param a The term for which it should be checked if it is a strict subset
- * of `b` in the multiset domain
- * @param b The term for which it should be checked if it is a strict
- * superset of `a` in the multiset domain
- * @return True if it is always the case that `a` is a strict subset of `b`,
- * false otherwise.
- */
- static bool checkEntailMultisetSubset(Node a, Node b);
-
- /**
- * Returns a character `c` if it is always the case that str.in.re(a, c*),
- * i.e. if all possible values of `a` only consist of `c` characters, and the
- * null node otherwise. If `a` is the empty string, the function returns an
- * empty string.
- *
- * @param a The node to check for homogeneity
- * @return If `a` is homogeneous, the only character that it may contain, the
- * empty string if `a` is empty, and the null node otherwise
- */
- static Node checkEntailHomogeneousString(Node a);
-
- /**
- * Simplifies a given node `a` s.t. the result is a concatenation of string
- * terms that can be interpreted as a multiset and which contains all
- * multisets that `a` could form.
- *
- * Examples:
- *
- * (str.substr "AA" 0 n) ---> "AA"
- * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB")
- *
- * @param a The node to simplify
- * @return A concatenation that can be interpreted as a multiset
- */
- static Node getMultisetApproximation(Node a);
-
- /**
- * Checks whether assumption |= a >= 0 (if strict is false) or
- * assumption |= a > 0 (if strict is true), where assumption is an equality
- * assumption. The assumption must be in rewritten form.
- *
- * Example:
- *
- * checkEntailArithWithEqAssumption(x + (str.len y) = 0, -x, false) = true
- *
- * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true
- */
- static bool checkEntailArithWithEqAssumption(Node assumption,
- Node a,
- bool strict = false);
-
- /**
- * Checks whether assumption |= a >= b (if strict is false) or
- * assumption |= a > b (if strict is true). The function returns true if it
- * can be shown that the entailment holds and false otherwise. Assumption
- * must be in rewritten form. Assumption may be an equality or an inequality.
- *
- * Example:
- *
- * checkEntailArithWithAssumption(x + (str.len y) = 0, 0, x, false) = true
- *
- * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
- */
- static bool checkEntailArithWithAssumption(Node assumption,
- Node a,
- Node b,
- bool strict = false);
-
- /**
- * Checks whether assumptions |= a >= b (if strict is false) or
- * assumptions |= a > b (if strict is true). The function returns true if it
- * can be shown that the entailment holds and false otherwise. Assumptions
- * must be in rewritten form. Assumptions may be an equalities or an
- * inequalities.
- *
- * Example:
- *
- * checkEntailArithWithAssumptions([x + (str.len y) = 0], 0, x, false) = true
- *
- * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true
- */
- static bool checkEntailArithWithAssumptions(std::vector<Node> assumptions,
- Node a,
- Node b,
- bool strict = false);
-
- /** get arithmetic lower bound
- * If this function returns a non-null Node ret,
- * then ret is a rational constant and
- * we know that n >= ret always if isLower is true,
- * or n <= ret if isLower is false.
- *
- * Notice the following invariant.
- * If getConstantArithBound(a, true) = ret where ret is non-null, then for
- * strict = { true, false } :
- * ret >= strict ? 1 : 0
- * if and only if
- * checkEntailArith( a, strict ) = true.
- */
- static Node getConstantArithBound(Node a, bool isLower = true);
- /** get length for regular expression
- *
- * Given regular expression n, if this method returns a non-null value c, then
- * x in n entails len( x ) = c.
- */
- static Node getFixedLengthForRegexp(Node n);
- /** decompose substr chain
- *
- * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this
- * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls.
- */
- static Node decomposeSubstrChain(Node s,
- std::vector<Node>& ss,
- std::vector<Node>& ls);
- /** make substr chain
- *
- * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term
- * substr( ... substr( base, x1, y1 ) ..., xn, yn ).
- */
- static Node mkSubstrChain(Node base,
- const std::vector<Node>& ss,
- const std::vector<Node>& ls);
-
- /**
- * Overapproximates the possible values of node n. This overapproximation
- * assumes that n can return a value x or the empty string and tries to find
- * the simplest x such that this holds. In the general case, x is the same as
- * the input n. This overapproximation can be used to sort terms with the
- * same possible values in string concatenation for example.
- *
- * Example:
- *
- * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y)
- * either returns y or ""
- *
- * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y)
- * because the function could not compute a simpler
- */
- static Node getStringOrEmpty(Node n);
-
- /**
- * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the
- * original inequality still holds. Returns true if the original inequality
- * holds and false otherwise. The list of ys is modified to contain a subset
- * of the original ys.
- *
- * Example:
- *
- * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] )
- * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ]
- * (can be used to rewrite the inequality to false)
- *
- * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] )
- * --> returns false because it is not possible to show
- * str.len(y) >= str.len(x)
- */
- static bool inferZerosInSumGeq(Node x,
- std::vector<Node>& ys,
- std::vector<Node>& zeroYs);
-
- /**
- * Infers a conjunction of equalities that correspond to (str.contains x y)
- * if it can show that the length of y is greater or equal to the length of
- * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction
- * is of the form:
- *
- * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
- *
- * where each yi'' are yi that must be empty for (= x y) to hold and yi' are
- * yi that the function could not infer anything about. Returns a null node
- * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x
- * y) if the function can infer that str.len(y) >= str.len(x) but cannot
- * infer that any of the yi must be empty.
- */
- static Node inferEqsFromContains(Node x, Node y);
-
- /**
- * Collects equal-to-empty nodes from a conjunction or a single
- * node. Returns a list of nodes that are compared to empty nodes
- * and a boolean that indicates whether all nodes in the
- * conjunction were a comparison with the empty node. The nodes in
- * the list are sorted and duplicates removed.
- *
- * Examples:
- *
- * collectEmptyEqs( (= "" x) ) = { true, [x] }
- * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] }
- * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] }
- *
- * @param x The conjunction of equalities or a single equality
- * @return A pair of a boolean that indicates whether the
- * conjunction consists only of comparisons to the empty string
- * and the list of nodes that are compared to the empty string
+ * Given a symbolic length n, returns the canonical string (of type stype)
+ * for that length. For example if n is constant, this function returns a
+ * string consisting of "A" repeated n times. Returns the null node if no such
+ * string exists.
*/
- static std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
+ static Node canonicalStrForSymbolicLength(Node n, TypeNode stype);
}; /* class SequencesRewriter */
} // namespace strings
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp
index 7396a5013..bdce86d06 100644
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -15,7 +15,7 @@
#include "theory/strings/skolem_cache.h"
#include "theory/rewriter.h"
-#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/arith_entail.h"
#include "util/rational.h"
using namespace CVC4::kind;
@@ -163,8 +163,8 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b)
a = s;
b = m;
}
- else if (SequencesRewriter::checkEntailArith(nm->mkNode(PLUS, n, m),
- nm->mkNode(STRING_LENGTH, s)))
+ else if (ArithEntail::check(nm->mkNode(PLUS, n, m),
+ nm->mkNode(STRING_LENGTH, s)))
{
// SK_PURIFY((str.substr x n m)) ---> SK_SUFFIX_REM(x, n)
// if n + m >= (str.len x)
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp
new file mode 100644
index 000000000..a1abfabe5
--- /dev/null
+++ b/src/theory/strings/strings_entail.cpp
@@ -0,0 +1,994 @@
+/********************* */
+/*! \file strings_entail.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of entailment tests involving strings.
+ **/
+
+#include "theory/strings/strings_entail.h"
+
+#include "expr/node_builder.h"
+#include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/sequences_rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+bool StringsEntail::canConstantContainConcat(Node c,
+ Node n,
+ int& firstc,
+ int& lastc)
+{
+ Assert(c.isConst());
+ CVC4::String t = c.getConst<String>();
+ const std::vector<unsigned>& tvec = t.getVec();
+ Assert(n.getKind() == STRING_CONCAT);
+ // must find constant components in order
+ size_t pos = 0;
+ firstc = -1;
+ lastc = -1;
+ for (unsigned i = 0; i < n.getNumChildren(); i++)
+ {
+ if (n[i].isConst())
+ {
+ firstc = firstc == -1 ? i : firstc;
+ lastc = i;
+ CVC4::String s = n[i].getConst<String>();
+ size_t new_pos = t.find(s, pos);
+ if (new_pos == std::string::npos)
+ {
+ return false;
+ }
+ else
+ {
+ pos = new_pos + s.size();
+ }
+ }
+ else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
+ {
+ // find the first occurrence of a digit starting at pos
+ while (pos < tvec.size() && !String::isDigit(tvec[pos]))
+ {
+ pos++;
+ }
+ if (pos == tvec.size())
+ {
+ return false;
+ }
+ // must consume at least one digit here
+ pos++;
+ }
+ }
+ return true;
+}
+
+bool StringsEntail::canConstantContainList(Node c,
+ std::vector<Node>& l,
+ int& firstc,
+ int& lastc)
+{
+ Assert(c.isConst());
+ // must find constant components in order
+ size_t pos = 0;
+ firstc = -1;
+ lastc = -1;
+ for (unsigned i = 0; i < l.size(); i++)
+ {
+ if (l[i].isConst())
+ {
+ firstc = firstc == -1 ? i : firstc;
+ lastc = i;
+ size_t new_pos = Word::find(c, l[i], pos);
+ if (new_pos == std::string::npos)
+ {
+ return false;
+ }
+ else
+ {
+ pos = new_pos + Word::getLength(l[i]);
+ }
+ }
+ }
+ return true;
+}
+
+bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
+ std::vector<Node>& nr,
+ int dir,
+ Node& curr)
+{
+ Assert(dir == 1 || dir == -1);
+ Assert(nr.empty());
+ Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+ bool ret = false;
+ bool success;
+ unsigned sindex = 0;
+ do
+ {
+ Assert(!curr.isNull());
+ success = false;
+ if (curr != zero && sindex < n1.size())
+ {
+ unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
+ if (n1[sindex_use].isConst())
+ {
+ // could strip part of a constant
+ Node lowerBound =
+ ArithEntail::getConstantBound(Rewriter::rewrite(curr));
+ if (!lowerBound.isNull())
+ {
+ Assert(lowerBound.isConst());
+ Rational lbr = lowerBound.getConst<Rational>();
+ if (lbr.sgn() > 0)
+ {
+ Assert(ArithEntail::check(curr, true));
+ CVC4::String s = n1[sindex_use].getConst<String>();
+ Node ncl =
+ NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
+ Node next_s =
+ NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
+ next_s = Rewriter::rewrite(next_s);
+ Assert(next_s.isConst());
+ // we can remove the entire constant
+ if (next_s.getConst<Rational>().sgn() >= 0)
+ {
+ curr = Rewriter::rewrite(
+ NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
+ success = true;
+ sindex++;
+ }
+ else
+ {
+ // we can remove part of the constant
+ // lower bound minus the length of a concrete string is negative,
+ // hence lowerBound cannot be larger than long max
+ Assert(lbr < Rational(String::maxSize()));
+ curr = Rewriter::rewrite(
+ NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
+ uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
+ Assert(lbsize < s.size());
+ if (dir == 1)
+ {
+ // strip partially from the front
+ nr.push_back(
+ NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
+ n1[sindex_use] = NodeManager::currentNM()->mkConst(
+ s.suffix(s.size() - lbsize));
+ }
+ else
+ {
+ // strip partially from the back
+ nr.push_back(
+ NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
+ n1[sindex_use] = NodeManager::currentNM()->mkConst(
+ s.prefix(s.size() - lbsize));
+ }
+ ret = true;
+ }
+ Assert(ArithEntail::check(curr));
+ }
+ else
+ {
+ // we cannot remove the constant
+ }
+ }
+ }
+ else
+ {
+ Node next_s = NodeManager::currentNM()->mkNode(
+ MINUS,
+ curr,
+ NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
+ next_s = Rewriter::rewrite(next_s);
+ if (ArithEntail::check(next_s))
+ {
+ success = true;
+ curr = next_s;
+ sindex++;
+ }
+ }
+ }
+ } while (success);
+ if (sindex > 0)
+ {
+ if (dir == 1)
+ {
+ nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
+ n1.erase(n1.begin(), n1.begin() + sindex);
+ }
+ else
+ {
+ nr.insert(nr.end(), n1.end() - sindex, n1.end());
+ n1.erase(n1.end() - sindex, n1.end());
+ }
+ ret = true;
+ }
+ return ret;
+}
+
+int StringsEntail::componentContains(std::vector<Node>& n1,
+ std::vector<Node>& n2,
+ std::vector<Node>& nb,
+ std::vector<Node>& ne,
+ bool computeRemainder,
+ int remainderDir)
+{
+ Assert(nb.empty());
+ Assert(ne.empty());
+ // if n2 is a singleton, we can do optimized version here
+ if (n2.size() == 1)
+ {
+ for (unsigned i = 0; i < n1.size(); i++)
+ {
+ Node n1rb;
+ Node n1re;
+ if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
+ {
+ if (computeRemainder)
+ {
+ n1[i] = n2[0];
+ if (remainderDir != -1)
+ {
+ if (!n1re.isNull())
+ {
+ ne.push_back(n1re);
+ }
+ ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
+ n1.erase(n1.begin() + i + 1, n1.end());
+ }
+ else if (!n1re.isNull())
+ {
+ n1[i] = Rewriter::rewrite(
+ NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
+ }
+ if (remainderDir != 1)
+ {
+ nb.insert(nb.end(), n1.begin(), n1.begin() + i);
+ n1.erase(n1.begin(), n1.begin() + i);
+ if (!n1rb.isNull())
+ {
+ nb.push_back(n1rb);
+ }
+ }
+ else if (!n1rb.isNull())
+ {
+ n1[i] = Rewriter::rewrite(
+ NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
+ }
+ }
+ return i;
+ }
+ }
+ }
+ else if (n1.size() >= n2.size())
+ {
+ unsigned diff = n1.size() - n2.size();
+ for (unsigned i = 0; i <= diff; i++)
+ {
+ Node n1rb_first;
+ Node n1re_first;
+ // first component of n2 must be a suffix
+ if (componentContainsBase(n1[i],
+ n2[0],
+ n1rb_first,
+ n1re_first,
+ 1,
+ computeRemainder && remainderDir != 1))
+ {
+ Assert(n1re_first.isNull());
+ for (unsigned j = 1; j < n2.size(); j++)
+ {
+ // are we in the last component?
+ if (j + 1 == n2.size())
+ {
+ Node n1rb_last;
+ Node n1re_last;
+ // last component of n2 must be a prefix
+ if (componentContainsBase(n1[i + j],
+ n2[j],
+ n1rb_last,
+ n1re_last,
+ -1,
+ computeRemainder && remainderDir != -1))
+ {
+ Assert(n1rb_last.isNull());
+ if (computeRemainder)
+ {
+ if (remainderDir != -1)
+ {
+ if (!n1re_last.isNull())
+ {
+ ne.push_back(n1re_last);
+ }
+ ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
+ n1.erase(n1.begin() + i + j + 1, n1.end());
+ n1[i + j] = n2[j];
+ }
+ if (remainderDir != 1)
+ {
+ n1[i] = n2[0];
+ nb.insert(nb.end(), n1.begin(), n1.begin() + i);
+ n1.erase(n1.begin(), n1.begin() + i);
+ if (!n1rb_first.isNull())
+ {
+ nb.push_back(n1rb_first);
+ }
+ }
+ }
+ return i;
+ }
+ else
+ {
+ break;
+ }
+ }
+ else if (n1[i + j] != n2[j])
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+ return -1;
+}
+
+bool StringsEntail::componentContainsBase(
+ Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
+{
+ Assert(n1rb.isNull());
+ Assert(n1re.isNull());
+
+ NodeManager* nm = NodeManager::currentNM();
+
+ if (n1 == n2)
+ {
+ return true;
+ }
+ else
+ {
+ if (n1.isConst() && n2.isConst())
+ {
+ size_t len1 = Word::getLength(n1);
+ size_t len2 = Word::getLength(n2);
+ if (len2 < len1)
+ {
+ if (dir == 1)
+ {
+ if (Word::suffix(n1, len2) == n2)
+ {
+ if (computeRemainder)
+ {
+ n1rb = Word::prefix(n1, len1 - len2);
+ }
+ return true;
+ }
+ }
+ else if (dir == -1)
+ {
+ if (Word::prefix(n1, len2) == n2)
+ {
+ if (computeRemainder)
+ {
+ n1re = Word::suffix(n1, len1 - len2);
+ }
+ return true;
+ }
+ }
+ else
+ {
+ size_t f = Word::find(n1, n2);
+ if (f != std::string::npos)
+ {
+ if (computeRemainder)
+ {
+ if (f > 0)
+ {
+ n1rb = Word::prefix(n1, f);
+ }
+ if (len1 > f + len2)
+ {
+ n1re = Word::suffix(n1, len1 - (f + len2));
+ }
+ }
+ return true;
+ }
+ }
+ }
+ }
+ else
+ {
+ // cases for:
+ // n1 = x containing n2 = substr( x, n2[1], n2[2] )
+ if (n2.getKind() == STRING_SUBSTR)
+ {
+ if (n2[0] == n1)
+ {
+ bool success = true;
+ Node start_pos = n2[1];
+ Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
+ Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
+ if (dir == 1)
+ {
+ // To be a suffix, start + length must be greater than
+ // or equal to the length of the string.
+ success = ArithEntail::check(end_pos, len_n2s);
+ }
+ else if (dir == -1)
+ {
+ // To be a prefix, must literally start at 0, since
+ // if we knew it started at <0, it should be rewritten to "",
+ // if we knew it started at 0, then n2[1] should be rewritten to
+ // 0.
+ success = start_pos.isConst()
+ && start_pos.getConst<Rational>().sgn() == 0;
+ }
+ if (success)
+ {
+ if (computeRemainder)
+ {
+ // we can only compute the remainder if start_pos and end_pos
+ // are known to be non-negative.
+ if (!ArithEntail::check(start_pos)
+ || !ArithEntail::check(end_pos))
+ {
+ return false;
+ }
+ if (dir != 1)
+ {
+ n1rb = nm->mkNode(
+ STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
+ }
+ if (dir != -1)
+ {
+ n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
+ }
+ }
+ return true;
+ }
+ }
+ }
+
+ if (!computeRemainder && dir == 0)
+ {
+ if (n1.getKind() == STRING_STRREPL)
+ {
+ // (str.contains (str.replace x y z) w) ---> true
+ // if (str.contains x w) --> true and (str.contains z w) ---> true
+ Node xCtnW = StringsEntail::checkContains(n1[0], n2);
+ if (!xCtnW.isNull() && xCtnW.getConst<bool>())
+ {
+ Node zCtnW = StringsEntail::checkContains(n1[2], n2);
+ if (!zCtnW.isNull() && zCtnW.getConst<bool>())
+ {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
+ std::vector<Node>& n2,
+ std::vector<Node>& nb,
+ std::vector<Node>& ne,
+ int dir)
+{
+ Assert(nb.empty());
+ Assert(ne.empty());
+
+ NodeManager* nm = NodeManager::currentNM();
+ bool changed = false;
+ // for ( forwards, backwards ) direction
+ for (unsigned r = 0; r < 2; r++)
+ {
+ if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
+ {
+ unsigned index0 = r == 0 ? 0 : n1.size() - 1;
+ unsigned index1 = r == 0 ? 0 : n2.size() - 1;
+ bool removeComponent = false;
+ Node n1cmp = n1[index0];
+
+ if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
+ {
+ return false;
+ }
+
+ std::vector<Node> sss;
+ std::vector<Node> sls;
+ n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
+ Trace("strings-rewrite-debug2")
+ << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
+ << ", dir = " << dir << std::endl;
+ if (n1cmp.isConst())
+ {
+ CVC4::String s = n1cmp.getConst<String>();
+ // overlap is an overapproximation of the number of characters
+ // n2[index1] can match in s
+ unsigned overlap = s.size();
+ if (n2[index1].isConst())
+ {
+ CVC4::String t = n2[index1].getConst<String>();
+ std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
+ if (ret == std::string::npos)
+ {
+ if (n1.size() == 1)
+ {
+ // can remove everything
+ // e.g. str.contains( "abc", str.++( "ba", x ) ) -->
+ // str.contains( "", str.++( "ba", x ) )
+ removeComponent = true;
+ }
+ else if (sss.empty()) // only if not substr
+ {
+ // check how much overlap there is
+ // This is used to partially strip off the endpoint
+ // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
+ // str.contains( str.++( "c", x ), str.++( "cd", y ) )
+ overlap = r == 0 ? s.overlap(t) : t.overlap(s);
+ }
+ else
+ {
+ // if we are looking at a substring, we can remove the component
+ // if there is no overlap
+ // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
+ // --> str.contains( x, "a" )
+ removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+ }
+ }
+ else if (sss.empty()) // only if not substr
+ {
+ Assert(ret < s.size());
+ // can strip off up to the find position, e.g.
+ // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
+ // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
+ // and
+ // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
+ // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
+ overlap = s.size() - ret;
+ }
+ }
+ else
+ {
+ // inconclusive
+ }
+ // process the overlap
+ if (overlap < s.size())
+ {
+ changed = true;
+ if (overlap == 0)
+ {
+ removeComponent = true;
+ }
+ else
+ {
+ // can drop the prefix (resp. suffix) from the first (resp. last)
+ // component
+ if (r == 0)
+ {
+ nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
+ n1[index0] = nm->mkConst(s.suffix(overlap));
+ }
+ else
+ {
+ ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
+ n1[index0] = nm->mkConst(s.prefix(overlap));
+ }
+ }
+ }
+ }
+ else if (n1cmp.getKind() == STRING_ITOS)
+ {
+ if (n2[index1].isConst())
+ {
+ CVC4::String t = n2[index1].getConst<String>();
+
+ if (n1.size() == 1)
+ {
+ // if n1.size()==1, then if n2[index1] is not a number, we can drop
+ // the entire component
+ // e.g. str.contains( int.to.str(x), "123a45") --> false
+ if (!t.isNumber())
+ {
+ removeComponent = true;
+ }
+ }
+ else
+ {
+ const std::vector<unsigned>& tvec = t.getVec();
+ Assert(tvec.size() > 0);
+
+ // if n1.size()>1, then if the first (resp. last) character of
+ // n2[index1]
+ // is not a digit, we can drop the entire component, e.g.:
+ // str.contains( str.++( int.to.str(x), y ), "a12") -->
+ // str.contains( y, "a12" )
+ // str.contains( str.++( y, int.to.str(x) ), "a0b") -->
+ // str.contains( y, "a0b" )
+ unsigned i = r == 0 ? 0 : (tvec.size() - 1);
+ if (!String::isDigit(tvec[i]))
+ {
+ removeComponent = true;
+ }
+ }
+ }
+ }
+ if (removeComponent)
+ {
+ // can drop entire first (resp. last) component
+ if (r == 0)
+ {
+ nb.push_back(n1[index0]);
+ n1.erase(n1.begin(), n1.begin() + 1);
+ }
+ else
+ {
+ ne.push_back(n1[index0]);
+ n1.pop_back();
+ }
+ if (n1.empty())
+ {
+ // if we've removed everything, just return (we will rewrite to false)
+ return true;
+ }
+ else
+ {
+ changed = true;
+ }
+ }
+ }
+ }
+ // TODO (#1180) : computing the maximal overlap in this function may be
+ // important.
+ // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
+ // false
+ // ...since str.to.int(x) can contain at most 1 character from "2aaaa",
+ // leaving 4 characters
+ // which is larger that the upper bound for length of str.substr(y,0,3),
+ // which is 3.
+ return changed;
+}
+
+Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Node ctn = nm->mkNode(STRING_STRCTN, a, b);
+
+ if (fullRewriter)
+ {
+ ctn = Rewriter::rewrite(ctn);
+ }
+ else
+ {
+ Node prev;
+ do
+ {
+ prev = ctn;
+ ctn = SequencesRewriter::rewriteContains(ctn);
+ } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
+ }
+
+ Assert(ctn.getType().isBoolean());
+ return ctn.isConst() ? ctn : Node::null();
+}
+
+bool StringsEntail::checkNonEmpty(Node a)
+{
+ Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
+ len = Rewriter::rewrite(len);
+ return ArithEntail::check(len, true);
+}
+
+bool StringsEntail::checkLengthOne(Node s, bool strict)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Node one = nm->mkConst(Rational(1));
+ Node len = nm->mkNode(STRING_LENGTH, s);
+ len = Rewriter::rewrite(len);
+ return ArithEntail::check(one, len)
+ && (!strict || ArithEntail::check(len, true));
+}
+
+bool StringsEntail::checkMultisetSubset(Node a, Node b)
+{
+ std::vector<Node> avec;
+ utils::getConcat(getMultisetApproximation(a), avec);
+ std::vector<Node> bvec;
+ utils::getConcat(b, bvec);
+
+ std::map<Node, unsigned> num_nconst[2];
+ std::map<Node, unsigned> num_const[2];
+ for (unsigned j = 0; j < 2; j++)
+ {
+ std::vector<Node>& jvec = j == 0 ? avec : bvec;
+ for (const Node& cc : jvec)
+ {
+ if (cc.isConst())
+ {
+ num_const[j][cc]++;
+ }
+ else
+ {
+ num_nconst[j][cc]++;
+ }
+ }
+ }
+ bool ms_success = true;
+ for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
+ {
+ if (nncp.second > num_nconst[1][nncp.first])
+ {
+ ms_success = false;
+ break;
+ }
+ }
+ if (ms_success)
+ {
+ // count the number of constant characters in the first argument
+ std::map<Node, unsigned> count_const[2];
+ std::vector<Node> chars;
+ for (unsigned j = 0; j < 2; j++)
+ {
+ for (std::pair<const Node, unsigned>& ncp : num_const[j])
+ {
+ Node cn = ncp.first;
+ Assert(cn.isConst());
+ std::vector<Node> cnChars = Word::getChars(cn);
+ for (const Node& ch : cnChars)
+ {
+ count_const[j][ch] += ncp.second;
+ if (std::find(chars.begin(), chars.end(), ch) == chars.end())
+ {
+ chars.push_back(ch);
+ }
+ }
+ }
+ }
+ Trace("strings-entail-ms-ss")
+ << "For " << a << " and " << b << " : " << std::endl;
+ for (const Node& ch : chars)
+ {
+ Trace("strings-entail-ms-ss") << " # occurrences of substring ";
+ Trace("strings-entail-ms-ss") << ch << " in arguments is ";
+ Trace("strings-entail-ms-ss")
+ << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
+ if (count_const[0][ch] < count_const[1][ch])
+ {
+ return true;
+ }
+ }
+
+ // TODO (#1180): count the number of 2,3,4,.. character substrings
+ // for example:
+ // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
+ // since the second argument contains more occurrences of "bb".
+ // note this is orthogonal reasoning to inductive reasoning
+ // via regular membership reduction in Liang et al CAV 2015.
+ }
+ return false;
+}
+
+Node StringsEntail::checkHomogeneousString(Node a)
+{
+ std::vector<Node> avec;
+ utils::getConcat(getMultisetApproximation(a), avec);
+
+ bool cValid = false;
+ Node c;
+ for (const Node& ac : avec)
+ {
+ if (ac.isConst())
+ {
+ std::vector<Node> acv = Word::getChars(ac);
+ for (const Node& cc : acv)
+ {
+ if (!cValid)
+ {
+ cValid = true;
+ c = cc;
+ }
+ else if (c != cc)
+ {
+ // Found a different character
+ return Node::null();
+ }
+ }
+ }
+ else
+ {
+ // Could produce a different character
+ return Node::null();
+ }
+ }
+
+ if (!cValid)
+ {
+ return Word::mkEmptyWord(a.getType());
+ }
+
+ return c;
+}
+
+Node StringsEntail::getMultisetApproximation(Node a)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ if (a.getKind() == STRING_SUBSTR)
+ {
+ return a[0];
+ }
+ else if (a.getKind() == STRING_STRREPL)
+ {
+ return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
+ }
+ else if (a.getKind() == STRING_CONCAT)
+ {
+ NodeBuilder<> nb(STRING_CONCAT);
+ for (const Node& ac : a)
+ {
+ nb << getMultisetApproximation(ac);
+ }
+ return nb.constructNode();
+ }
+ else
+ {
+ return a;
+ }
+}
+
+Node StringsEntail::getStringOrEmpty(Node n)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Node res;
+ while (res.isNull())
+ {
+ switch (n.getKind())
+ {
+ case STRING_STRREPL:
+ {
+ Node empty = nm->mkConst(::CVC4::String(""));
+ if (n[0] == empty)
+ {
+ // (str.replace "" x y) --> y
+ n = n[2];
+ break;
+ }
+
+ if (checkLengthOne(n[0]) && n[2] == empty)
+ {
+ // (str.replace "A" x "") --> "A"
+ res = n[0];
+ break;
+ }
+
+ res = n;
+ break;
+ }
+ case STRING_SUBSTR:
+ {
+ if (checkLengthOne(n[0]))
+ {
+ // (str.substr "A" x y) --> "A"
+ res = n[0];
+ break;
+ }
+ res = n;
+ break;
+ }
+ default:
+ {
+ res = n;
+ break;
+ }
+ }
+ }
+ return res;
+}
+
+Node StringsEntail::inferEqsFromContains(Node x, Node y)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Node emp = nm->mkConst(String(""));
+ Assert(x.getType() == y.getType());
+ TypeNode stype = x.getType();
+
+ Node xLen = nm->mkNode(STRING_LENGTH, x);
+ std::vector<Node> yLens;
+ if (y.getKind() != STRING_CONCAT)
+ {
+ yLens.push_back(nm->mkNode(STRING_LENGTH, y));
+ }
+ else
+ {
+ for (const Node& yi : y)
+ {
+ yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
+ }
+ }
+
+ std::vector<Node> zeroLens;
+ if (x == emp)
+ {
+ // If x is the empty string, then all ys must be empty, too, and we can
+ // skip the expensive checks. Note that this is just a performance
+ // optimization.
+ zeroLens.swap(yLens);
+ }
+ else
+ {
+ // Check if we can infer that str.len(x) <= str.len(y). If that is the
+ // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
+ // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
+ // true. The terms that can have length zero without making the inequality
+ // false must be all be empty if (str.contains x y) is true.
+ if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
+ {
+ // We could not prove that the inequality holds
+ return Node::null();
+ }
+ else if (yLens.size() == y.getNumChildren())
+ {
+ // We could only prove that the inequality holds but not that any of the
+ // ys must be empty
+ return nm->mkNode(EQUAL, x, y);
+ }
+ }
+
+ if (y.getKind() != STRING_CONCAT)
+ {
+ if (zeroLens.size() == 1)
+ {
+ // y is not a concatenation and we found that it must be empty, so just
+ // return (= y "")
+ Assert(zeroLens[0][0] == y);
+ return nm->mkNode(EQUAL, y, emp);
+ }
+ else
+ {
+ Assert(yLens.size() == 1 && yLens[0][0] == y);
+ return nm->mkNode(EQUAL, x, y);
+ }
+ }
+
+ std::vector<Node> cs;
+ for (const Node& yiLen : yLens)
+ {
+ Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
+ cs.push_back(yiLen[0]);
+ }
+
+ NodeBuilder<> nb(AND);
+ // (= x (str.++ y1' ... ym'))
+ if (!cs.empty())
+ {
+ nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
+ }
+ // (= y1'' "") ... (= yk'' "")
+ for (const Node& zeroLen : zeroLens)
+ {
+ Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
+ nb << nm->mkNode(EQUAL, zeroLen[0], emp);
+ }
+
+ // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
+ return nb.constructNode();
+}
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h
new file mode 100644
index 000000000..d4993faf4
--- /dev/null
+++ b/src/theory/strings/strings_entail.h
@@ -0,0 +1,371 @@
+/********************* */
+/*! \file strings_entail.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Andres Noetzli
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Entailment tests involving strings
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__STRING_ENTAIL_H
+#define CVC4__THEORY__STRINGS__STRING_ENTAIL_H
+
+#include <vector>
+
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/**
+ * Entailment tests involving strings.
+ * Some of these techniques are described in Reynolds et al, "High Level
+ * Abstractions for Simplifying Extended String Constraints in SMT", CAV 2019.
+ */
+class StringsEntail
+{
+ public:
+ /** can constant contain list
+ * return true if constant c can contain the list l in order
+ * firstc/lastc store which indices in l were used to determine the return
+ * value.
+ * (This is typically used when this function returns false, for minimizing
+ * explanations)
+ *
+ * For example:
+ * canConstantContainList( "abc", { x, "c", y } ) returns true
+ * firstc/lastc are updated to 1/1
+ * canConstantContainList( "abc", { x, "d", y } ) returns false
+ * firstc/lastc are updated to 1/1
+ * canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w }
+ * returns false
+ * firstc/lastc are updated to 1/3
+ * canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w }
+ * returns false
+ * firstc/lastc are updated to 1/5
+ */
+ static bool canConstantContainList(Node c,
+ std::vector<Node>& l,
+ int& firstc,
+ int& lastc);
+ /** can constant contain concat
+ * same as above but with n = str.++( l ) instead of l
+ */
+ static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc);
+
+ /** strip symbolic length
+ *
+ * This function strips off components of n1 whose length is less than
+ * or equal to argument curr, and stores them in nr. The direction
+ * dir determines whether the components are removed from the start
+ * or end of n1.
+ *
+ * In detail, this function updates n1 to n1' such that:
+ * If dir=1,
+ * n1 = str.++( nr, n1' )
+ * If dir=-1
+ * n1 = str.++( n1', nr )
+ * It updates curr to curr' such that:
+ * curr' = curr - str.len( str.++( nr ) ), and
+ * curr' >= 0
+ * where the latter fact is determined by checkArithEntail.
+ *
+ * This function returns true if n1 is modified.
+ *
+ * For example:
+ *
+ * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 )
+ * returns true
+ * n1 is updated to { "bc", y }
+ * nr is updated to { x, "a" }
+ * curr is updated to 0 *
+ *
+ * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 )
+ * returns false
+ *
+ * stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 )
+ * returns false
+ *
+ * stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 )
+ * returns true
+ * n1 is updated to { x }
+ * nr is updated to { "abc", y }
+ * curr is updated to str.len(y)+1
+ */
+ static bool stripSymbolicLength(std::vector<Node>& n1,
+ std::vector<Node>& nr,
+ int dir,
+ Node& curr);
+ /** component contains
+ * This function is used when rewriting str.contains( t1, t2 ), where
+ * n1 is the vector form of t1
+ * n2 is the vector form of t2
+ *
+ * If this function returns n>=0 for some n, then
+ * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm },
+ * n2 = { y1...ys },
+ * y1 is a suffix of xn,
+ * y2...y{s-1} = x{n+1}...x{n+s-1}, and
+ * ys is a prefix of x{n+s}
+ * Otherwise it returns -1.
+ *
+ * This function may update n1 if computeRemainder = true.
+ * We maintain the invariant that the resulting value n1'
+ * of n1 after this function is such that:
+ * n1 = str.++( nb, n1', ne )
+ * The vectors nb and ne have the following properties.
+ * If computeRemainder = true, then
+ * If remainderDir != -1, then
+ * ne is { x{n+s}' x{n+s+1}...xm }
+ * where x{n+s} = str.++( ys, x{n+s}' ).
+ * If remainderDir != 1, then
+ * nb is { x1, ..., x{n-1}, xn' }
+ * where xn = str.++( xn', y1 ).
+ *
+ * For example:
+ *
+ * componentContains({ x, "abc", x }, { "b" }, {}, true, 0)
+ * returns 1,
+ * n1 is updated to { "b" },
+ * nb is updated to { x, "a" },
+ * ne is updated to { "c", x }
+ *
+ * componentContains({ x, "abc", x }, { "b" }, {}, true, 1)
+ * returns 1,
+ * n1 is updated to { x, "ab" },
+ * ne is updated to { "c", x }
+ *
+ * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1)
+ * returns 2,
+ * n1 is updated to { y, z, "abc", x, "de" },
+ * ne is updated to { "f" }
+ *
+ * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1)
+ * returns 1,
+ * n1 is updated to { "c", x, "def" },
+ * nb is updated to { y, "ab" }
+ */
+ static int componentContains(std::vector<Node>& n1,
+ std::vector<Node>& n2,
+ std::vector<Node>& nb,
+ std::vector<Node>& ne,
+ bool computeRemainder = false,
+ int remainderDir = 0);
+ /** strip constant endpoints
+ * This function is used when rewriting str.contains( t1, t2 ), where
+ * n1 is the vector form of t1
+ * n2 is the vector form of t2
+ *
+ * It modifies n1 to a new vector n1' such that:
+ * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to
+ * str.contains( str.++( n1' ), str.++( n2 ) )
+ * (2) str.++( n1 ) = str.++( nb, n1', ne )
+ *
+ * "dir" is the direction in which we can modify n1:
+ * if dir = 1, then we allow dropping components from the front of n1,
+ * if dir = -1, then we allow dropping components from the back of n1,
+ * if dir = 0, then we allow dropping components from either.
+ *
+ * It returns true if n1 is modified.
+ *
+ * For example:
+ * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1)
+ * returns true,
+ * n1 is updated to { x, "de" }
+ * nb is updated to { "ab" }
+ * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0)
+ * returns true,
+ * n1 is updated to { "b", x, "d" }
+ * nb is updated to { "a" }
+ * ne is updated to { "e" }
+ * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1)
+ * returns true,
+ * n1 is updated to {"ad"}
+ * ne is updated to { substr("ccc",x,y) }
+ */
+ static bool stripConstantEndpoints(std::vector<Node>& n1,
+ std::vector<Node>& n2,
+ std::vector<Node>& nb,
+ std::vector<Node>& ne,
+ int dir = 0);
+
+ /**
+ * Checks whether a string term `a` is entailed to contain or not contain a
+ * string term `b`.
+ *
+ * @param a The string that is checked whether it contains `b`
+ * @param b The string that is checked whether it is contained in `a`
+ * @param fullRewriter Determines whether the function can use the full
+ * rewriter or only `rewriteContains()` (useful for avoiding loops)
+ * @return true node if it can be shown that `a` contains `b`, false node if
+ * it can be shown that `a` does not contain `b`, null node otherwise
+ */
+ static Node checkContains(Node a, Node b, bool fullRewriter = true);
+
+ /** entail non-empty
+ *
+ * Checks whether string a is entailed to be non-empty. Is equivalent to
+ * the call checkArithEntail( len( a ), true ).
+ */
+ static bool checkNonEmpty(Node a);
+
+ /**
+ * Checks whether string has at most/exactly length one. Length one strings
+ * can be used for more aggressive rewriting because there is guaranteed that
+ * it cannot be overlap multiple components in a string concatenation.
+ *
+ * @param s The string to check
+ * @param strict If true, the string must have exactly length one, otherwise
+ * at most length one
+ * @return True if the string has at most/exactly length one, false otherwise
+ */
+ static bool checkLengthOne(Node s, bool strict = false);
+
+ /**
+ * Checks whether it is always true that `a` is a strict subset of `b` in the
+ * multiset domain.
+ *
+ * Examples:
+ *
+ * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true
+ * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true
+ * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false
+ *
+ * @param a The term for which it should be checked if it is a strict subset
+ * of `b` in the multiset domain
+ * @param b The term for which it should be checked if it is a strict
+ * superset of `a` in the multiset domain
+ * @return True if it is always the case that `a` is a strict subset of `b`,
+ * false otherwise.
+ */
+ static bool checkMultisetSubset(Node a, Node b);
+
+ /**
+ * Returns a character `c` if it is always the case that str.in.re(a, c*),
+ * i.e. if all possible values of `a` only consist of `c` characters, and the
+ * null node otherwise. If `a` is the empty string, the function returns an
+ * empty string.
+ *
+ * @param a The node to check for homogeneity
+ * @return If `a` is homogeneous, the only character that it may contain, the
+ * empty string if `a` is empty, and the null node otherwise
+ */
+ static Node checkHomogeneousString(Node a);
+
+ /**
+ * Overapproximates the possible values of node n. This overapproximation
+ * assumes that n can return a value x or the empty string and tries to find
+ * the simplest x such that this holds. In the general case, x is the same as
+ * the input n. This overapproximation can be used to sort terms with the
+ * same possible values in string concatenation for example.
+ *
+ * Example:
+ *
+ * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y)
+ * either returns y or ""
+ *
+ * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y)
+ * because the function could not compute a simpler
+ */
+ static Node getStringOrEmpty(Node n);
+
+ /**
+ * Infers a conjunction of equalities that correspond to (str.contains x y)
+ * if it can show that the length of y is greater or equal to the length of
+ * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction
+ * is of the form:
+ *
+ * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
+ *
+ * where each yi'' are yi that must be empty for (= x y) to hold and yi' are
+ * yi that the function could not infer anything about. Returns a null node
+ * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x
+ * y) if the function can infer that str.len(y) >= str.len(x) but cannot
+ * infer that any of the yi must be empty.
+ */
+ static Node inferEqsFromContains(Node x, Node y);
+
+ private:
+ /** component contains base
+ *
+ * This function is a helper for the above function.
+ *
+ * It returns true if n2 is contained in n1 with the following
+ * restrictions:
+ * If dir=1, then n2 must be a suffix of n1.
+ * If dir=-1, then n2 must be a prefix of n1.
+ *
+ * If computeRemainder is true, then n1rb and n1re are
+ * updated such that :
+ * n1 = str.++( n1rb, n2, n1re )
+ * where a null value of n1rb and n1re indicates the
+ * empty string.
+ *
+ * For example:
+ *
+ * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false)
+ * returns false.
+ *
+ * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true)
+ * returns true,
+ * n1rb is set to "c",
+ * n1re is set to "e".
+ *
+ * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true)
+ * returns true,
+ * n1re is set to str.substr(y,5,str.len(y)).
+ *
+ *
+ * Notice that this function may return false when it cannot compute a
+ * remainder when it otherwise would have returned true. For example:
+ *
+ * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false)
+ * returns true.
+ *
+ * Hence, we know that str.substr(y,x,z) is contained in y. However:
+ *
+ * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true)
+ * returns false.
+ *
+ * The reason is since computeRemainder=true, it must be that
+ * y = str.++( n1rb, str.substr(y,x,z), n1re )
+ * for some n1rb, n1re. However, to construct such n1rb, n1re would require
+ * e.g. the terms:
+ * y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ),
+ * str.substr(y,x,z),
+ * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) )
+ *
+ * Since we do not wish to introduce ITE terms in the rewriter, we instead
+ * return false, indicating that we cannot compute the remainder.
+ */
+ static bool componentContainsBase(
+ Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder);
+ /**
+ * Simplifies a given node `a` s.t. the result is a concatenation of string
+ * terms that can be interpreted as a multiset and which contains all
+ * multisets that `a` could form.
+ *
+ * Examples:
+ *
+ * (str.substr "AA" 0 n) ---> "AA"
+ * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB")
+ *
+ * @param a The node to simplify
+ * @return A concatenation that can be interpreted as a multiset
+ */
+ static Node getMultisetApproximation(Node a);
+};
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__STRING_ENTAIL_H */
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index 7777b9bd7..097cef235 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -22,6 +22,7 @@
#include "options/strings_options.h"
#include "proof/proof_manager.h"
#include "smt/logic_exception.h"
+#include "theory/strings/arith_entail.h"
#include "theory/strings/sequences_rewriter.h"
#include "theory/strings/word.h"
@@ -75,7 +76,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
Node sk1 = n == d_zero ? emp
: d_sc->mkSkolemCached(
s, n, SkolemCache::SK_PREFIX, "sspre");
- Node sk2 = SequencesRewriter::checkEntailArith(t12, lt0)
+ Node sk2 = ArithEntail::check(t12, lt0)
? emp
: d_sc->mkSkolemCached(
s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp
index 3b4c757f2..b2a62cc62 100644
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -18,6 +18,8 @@
#include "options/strings_options.h"
#include "theory/rewriter.h"
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/strings_entail.h"
#include "theory/strings/word.h"
using namespace CVC4::kind;
@@ -178,6 +180,84 @@ Node getConstantEndpoint(Node e, bool isSuf)
return getConstantComponent(e);
}
+Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
+{
+ Assert(ss.empty());
+ Assert(ls.empty());
+ while (s.getKind() == STRING_SUBSTR)
+ {
+ ss.push_back(s[1]);
+ ls.push_back(s[2]);
+ s = s[0];
+ }
+ std::reverse(ss.begin(), ss.end());
+ std::reverse(ls.begin(), ls.end());
+ return s;
+}
+
+Node mkSubstrChain(Node base,
+ const std::vector<Node>& ss,
+ const std::vector<Node>& ls)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ for (unsigned i = 0, size = ss.size(); i < size; i++)
+ {
+ base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
+ }
+ return base;
+}
+
+std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
+{
+ // Collect the equalities of the form (= x "") (sorted)
+ std::set<TNode> emptyNodes;
+ bool allEmptyEqs = true;
+ if (x.getKind() == EQUAL)
+ {
+ if (Word::isEmpty(x[0]))
+ {
+ emptyNodes.insert(x[1]);
+ }
+ else if (Word::isEmpty(x[1]))
+ {
+ emptyNodes.insert(x[0]);
+ }
+ else
+ {
+ allEmptyEqs = false;
+ }
+ }
+ else if (x.getKind() == AND)
+ {
+ for (const Node& c : x)
+ {
+ if (c.getKind() == EQUAL)
+ {
+ if (Word::isEmpty(c[0]))
+ {
+ emptyNodes.insert(c[1]);
+ }
+ else if (Word::isEmpty(c[1]))
+ {
+ emptyNodes.insert(c[0]);
+ }
+ }
+ else
+ {
+ allEmptyEqs = false;
+ }
+ }
+ }
+
+ if (emptyNodes.size() == 0)
+ {
+ allEmptyEqs = false;
+ }
+
+ return std::make_pair(
+ allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
+}
+
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
{
size_t i = start;
@@ -270,6 +350,15 @@ bool isStringKind(Kind k)
|| k == STRING_FROM_CODE || k == STRING_TO_CODE;
}
+bool isRegExpKind(Kind k)
+{
+ return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
+ || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
+ || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
+ || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
+ || k == REGEXP_COMPLEMENT;
+}
+
TypeNode getOwnerStringType(Node n)
{
TypeNode tn;
diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h
index 846d3d563..9fc23499a 100644
--- a/src/theory/strings/theory_strings_utils.h
+++ b/src/theory/strings/theory_strings_utils.h
@@ -102,6 +102,41 @@ Node getConstantComponent(Node t);
*/
Node getConstantEndpoint(Node e, bool isSuf);
+/** decompose substr chain
+ *
+ * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this
+ * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls.
+ */
+Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls);
+/** make substr chain
+ *
+ * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term
+ * substr( ... substr( base, x1, y1 ) ..., xn, yn ).
+ */
+Node mkSubstrChain(Node base,
+ const std::vector<Node>& ss,
+ const std::vector<Node>& ls);
+
+/**
+ * Collects equal-to-empty nodes from a conjunction or a single
+ * node. Returns a list of nodes that are compared to empty nodes
+ * and a boolean that indicates whether all nodes in the
+ * conjunction were a comparison with the empty node. The nodes in
+ * the list are sorted and duplicates removed.
+ *
+ * Examples:
+ *
+ * collectEmptyEqs( (= "" x) ) = { true, [x] }
+ * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] }
+ * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] }
+ *
+ * @param x The conjunction of equalities or a single equality
+ * @return A pair of a boolean that indicates whether the
+ * conjunction consists only of comparisons to the empty string
+ * and the list of nodes that are compared to the empty string
+ */
+std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x);
+
/**
* Given a vector of regular expression nodes and a start index that points to
* a wildcard, returns true if the wildcard is unbounded (i.e. it is followed
@@ -142,6 +177,8 @@ void printConcatTrace(std::vector<Node>& n, const char* c);
/** Is k a string-specific kind? */
bool isStringKind(Kind k);
+/** is k a native operator whose return type is a regular expression? */
+bool isRegExpKind(Kind k);
/** Get owner string type
*
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp
index b42cf3160..085078dea 100644
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -99,7 +99,7 @@ std::vector<Node> Word::getChars(TNode x)
return ret;
}
-bool Word::isEmpty(TNode x) { return getLength(x) == 0; }
+bool Word::isEmpty(TNode x) { return x.isConst() && getLength(x) == 0; }
bool Word::strncmp(TNode x, TNode y, std::size_t n)
{
@@ -283,6 +283,31 @@ std::size_t Word::roverlap(TNode x, TNode y)
return 0;
}
+Node Word::splitConstant(Node x, Node y, size_t& index, bool isRev)
+{
+ Assert(x.isConst() && y.isConst());
+ size_t lenA = getLength(x);
+ size_t lenB = getLength(y);
+ index = lenA <= lenB ? 1 : 0;
+ size_t lenShort = index == 1 ? lenA : lenB;
+ bool cmp = isRev ? rstrncmp(x, y, lenShort) : strncmp(x, y, lenShort);
+ if (cmp)
+ {
+ Node l = index == 0 ? x : y;
+ if (isRev)
+ {
+ size_t new_len = getLength(l) - lenShort;
+ return substr(l, 0, new_len);
+ }
+ else
+ {
+ return substr(l, lenShort);
+ }
+ }
+ // not the same prefix/suffix
+ return Node::null();
+}
+
} // namespace strings
} // namespace theory
} // namespace CVC4
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h
index 8e6e7876e..454710c98 100644
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -139,6 +139,19 @@ class Word
* Notice that x.overlap(y) = y.roverlap(x)
*/
static std::size_t roverlap(TNode x, TNode y);
+ /** Split constant
+ *
+ * This returns the suffix remainder (resp. prefix remainder when isRev is
+ * true) of words a and b, call it r, such that:
+ * (1) a = b ++ r , or
+ * (2) a ++ r = b
+ * when isRev = false. The argument index is set to 1 if we are in the second
+ * case, and 0 otherwise.
+ *
+ * If a and b do not share a common prefix (resp. suffix), then this method
+ * returns the null node.
+ */
+ static Node splitConstant(Node a, Node b, size_t& index, bool isRev);
};
// ------------------------------ end for words (string or sequence constants)
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback