diff options
Diffstat (limited to 'src/theory/strings')
60 files changed, 4034 insertions, 2050 deletions
diff --git a/src/theory/strings/arith_entail.cpp b/src/theory/strings/arith_entail.cpp index adcbe590e..3c58767b3 100644 --- a/src/theory/strings/arith_entail.cpp +++ b/src/theory/strings/arith_entail.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -15,6 +15,7 @@ #include "theory/strings/arith_entail.h" #include "expr/attribute.h" +#include "expr/node_algorithm.h" #include "theory/arith/arith_msum.h" #include "theory/rewriter.h" #include "theory/strings/theory_strings_utils.h" @@ -555,6 +556,8 @@ bool ArithEntail::checkWithEqAssumption(Node assumption, Node a, bool strict) { Assert(assumption.getKind() == kind::EQUAL); Assert(Rewriter::rewrite(assumption) == assumption); + Trace("strings-entail") << "checkWithEqAssumption: " << assumption << " " << a + << ", strict=" << strict << std::endl; // Find candidates variables to compute substitutions for std::unordered_set<Node, NodeHashFunction> candVars; @@ -615,8 +618,11 @@ bool ArithEntail::checkWithEqAssumption(Node assumption, Node a, bool strict) // Could not solve for v return false; } + Trace("strings-entail") << "checkWithEqAssumption: subs " << v << " -> " + << solution << std::endl; - a = a.substitute(TNode(v), TNode(solution)); + // use capture avoiding substitution + a = expr::substituteCaptureAvoiding(a, v, solution); return check(a, strict); } diff --git a/src/theory/strings/arith_entail.h b/src/theory/strings/arith_entail.h index e3dd7e2e5..a0117a456 100644 --- a/src/theory/strings/arith_entail.h +++ b/src/theory/strings/arith_entail.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp index 00658d08b..451c01f8c 100644 --- a/src/theory/strings/base_solver.cpp +++ b/src/theory/strings/base_solver.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -638,9 +638,8 @@ void BaseSolver::checkCardinalityType(TypeNode tn, ei->d_cardinalityLemK.set(int_k + 1); if (!cons.isConst() || !cons.getConst<bool>()) { - std::vector<Node> emptyVec; d_im.sendInference( - emptyVec, expn, cons, Inference::CARDINALITY, true); + expn, expn, cons, Inference::CARDINALITY, false, true); return; } } @@ -675,7 +674,7 @@ Node BaseSolver::explainConstantEqc(Node n, Node eqc, std::vector<Node>& exp) } if (!bei.d_exp.isNull()) { - exp.push_back(bei.d_exp); + utils::flattenOp(AND, bei.d_exp, exp); } if (!bei.d_base.isNull()) { @@ -695,7 +694,7 @@ Node BaseSolver::explainBestContentEqc(Node n, Node eqc, std::vector<Node>& exp) Assert(!bei.d_bestContent.isNull()); if (!bei.d_exp.isNull()) { - exp.push_back(bei.d_exp); + utils::flattenOp(AND, bei.d_exp, exp); } if (!bei.d_base.isNull()) { diff --git a/src/theory/strings/base_solver.h b/src/theory/strings/base_solver.h index 5dcb75560..87f136dd0 100644 --- a/src/theory/strings/base_solver.h +++ b/src/theory/strings/base_solver.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 89a286a06..48116bc24 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -32,13 +32,15 @@ namespace strings { CoreInferInfo::CoreInferInfo() : d_index(0), d_rev(false) {} -CoreSolver::CoreSolver(context::Context* c, - context::UserContext* u, - SolverState& s, +CoreSolver::CoreSolver(SolverState& s, InferenceManager& im, TermRegistry& tr, BaseSolver& bs) - : d_state(s), d_im(im), d_termReg(tr), d_bsolver(bs), d_nfPairs(c) + : d_state(s), + d_im(im), + d_termReg(tr), + d_bsolver(bs), + d_nfPairs(s.getSatContext()) { d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); @@ -171,7 +173,6 @@ void CoreSolver::checkFlatForms() // conflict, explanation is n = base ^ base = c ^ relevant portion // of ( n = f[n] ) std::vector<Node> exp; - d_bsolver.explainConstantEqc(n,eqc,exp); for (int e = firstc; e <= lastc; e++) { if (d_flat_form[n][e].isConst()) @@ -180,9 +181,10 @@ void CoreSolver::checkFlatForms() Assert(d_flat_form_index[n][e] >= 0 && d_flat_form_index[n][e] < (int)n.getNumChildren()); d_im.addToExplanation( - d_flat_form[n][e], n[d_flat_form_index[n][e]], exp); + n[d_flat_form_index[n][e]], d_flat_form[n][e], exp); } } + d_bsolver.explainConstantEqc(n, eqc, exp); Node conc = d_false; d_im.sendInference(exp, conc, Inference::F_NCTN); return; @@ -239,6 +241,8 @@ void CoreSolver::checkFlatForm(std::vector<Node>& eqc, << "Check flat form for a = " << a << ", whose flat form is " << d_flat_form[a] << ")" << std::endl; Node b; + // the length explanation + Node lant; do { std::vector<Node> exp; @@ -370,10 +374,11 @@ void CoreSolver::checkFlatForm(std::vector<Node>& eqc, Trace("strings-ff-debug") << lexp2[j] << std::endl; } } - - exp.insert(exp.end(), lexp.begin(), lexp.end()); - exp.insert(exp.end(), lexp2.begin(), lexp2.end()); - d_im.addToExplanation(lcurr, lcc, exp); + std::vector<Node> lexpc; + lexpc.insert(lexpc.end(), lexp.begin(), lexp.end()); + lexpc.insert(lexpc.end(), lexp2.begin(), lexp2.end()); + d_im.addToExplanation(lcurr, lcc, lexpc); + lant = utils::mkAnd(lexpc); conc = ac.eqNode(bc); infType = Inference::F_UNIFY; break; @@ -388,7 +393,6 @@ void CoreSolver::checkFlatForm(std::vector<Node>& eqc, Trace("strings-ff-debug") << "Found inference (" << infType << "): " << conc << " based on equality " << a << " == " << b << ", " << isRev << std::endl; - d_im.addToExplanation(a, b, exp); // explain why prefixes up to now were the same for (size_t j = 0; j < count; j++) { @@ -425,12 +429,18 @@ void CoreSolver::checkFlatForm(std::vector<Node>& eqc, } } } + d_im.addToExplanation(a, b, exp); + if (!lant.isNull()) + { + // add the length explanation + exp.push_back(lant); + } // Notice that F_EndpointEmp is not typically applied, since // strict prefix equality ( a.b = a ) where a,b non-empty // is conflicting by arithmetic len(a.b)=len(a)+len(b)!=len(a) // when len(b)!=0. Although if we do not infer this conflict eagerly, // it may be applied (see #3272). - d_im.sendInference(exp, conc, infType); + d_im.sendInference(exp, conc, infType, isRev); if (d_state.isInConflict()) { return; @@ -549,9 +559,12 @@ void CoreSolver::checkNormalFormsEq() { NormalForm& nfe_eq = getNormalForm(itn->second); // two equivalence classes have same normal form, merge - std::vector<Node> nf_exp; - nf_exp.push_back(utils::mkAnd(nfe.d_exp)); - nf_exp.push_back(eqc_to_exp[itn->second]); + std::vector<Node> nf_exp(nfe.d_exp.begin(), nfe.d_exp.end()); + Node eexp = eqc_to_exp[itn->second]; + if (eexp != d_true) + { + nf_exp.push_back(eexp); + } Node eq = nfe.d_base.eqNode(nfe_eq.d_base); d_im.sendInference(nf_exp, eq, Inference::NORMAL_FORM); if (d_im.hasProcessed()) @@ -693,6 +706,162 @@ Node CoreSolver::getNormalString(Node x, std::vector<Node>& nf_exp) return x; } +Node CoreSolver::getConclusion(Node x, + Node y, + PfRule rule, + bool isRev, + SkolemCache* skc, + std::vector<Node>& newSkolems) +{ + Trace("strings-csolver") << "CoreSolver::getConclusion: " << x << " " << y + << " " << rule << " " << isRev << std::endl; + NodeManager* nm = NodeManager::currentNM(); + Node conc; + if (rule == PfRule::CONCAT_SPLIT || rule == PfRule::CONCAT_LPROP) + { + Node sk1; + Node sk2; + if (options::stringUnifiedVSpt()) + { + // must compare so that we are agnostic to order of x/y + Node ux = x < y ? x : y; + Node uy = x < y ? y : x; + Node sk = skc->mkSkolemCached(ux, + uy, + isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV + : SkolemCache::SK_ID_V_UNIFIED_SPT, + "v_spt"); + newSkolems.push_back(sk); + sk1 = sk; + sk2 = sk; + } + else + { + sk1 = skc->mkSkolemCached( + x, + y, + isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, + "v_spt1"); + sk2 = skc->mkSkolemCached( + y, + x, + isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, + "v_spt2"); + newSkolems.push_back(sk1); + newSkolems.push_back(sk2); + } + Node eq1 = x.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk1, y) + : nm->mkNode(STRING_CONCAT, y, sk1)); + + if (rule == PfRule::CONCAT_LPROP) + { + conc = eq1; + } + else + { + Node eq2 = y.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk2, x) + : nm->mkNode(STRING_CONCAT, x, sk2)); + // make agnostic to x/y + conc = x < y ? nm->mkNode(OR, eq1, eq2) : nm->mkNode(OR, eq2, eq1); + } + if (options::stringUnifiedVSpt() && options::stringLenConc()) + { + // we can assume its length is greater than zero + Node emp = Word::mkEmptyWord(sk1.getType()); + conc = nm->mkNode( + AND, + conc, + sk1.eqNode(emp).negate(), + nm->mkNode( + GT, nm->mkNode(STRING_LENGTH, sk1), nm->mkConst(Rational(0)))); + } + } + else if (rule == PfRule::CONCAT_CSPLIT) + { + Assert(y.isConst()); + size_t yLen = Word::getLength(y); + Node firstChar = + yLen == 1 ? y : (isRev ? Word::suffix(y, 1) : Word::prefix(y, 1)); + Node sk = skc->mkSkolemCached( + x, + isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT, + "c_spt"); + newSkolems.push_back(sk); + conc = x.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk, firstChar) + : nm->mkNode(STRING_CONCAT, firstChar, sk)); + } + else if (rule == PfRule::CONCAT_CPROP) + { + // expect (str.++ z d) and c + Assert(x.getKind() == STRING_CONCAT && x.getNumChildren() == 2); + Node z = x[isRev ? 1 : 0]; + Node d = x[isRev ? 0 : 1]; + Assert(d.isConst()); + Node c = y; + Assert(c.isConst()); + size_t cLen = Word::getLength(c); + size_t p = getSufficientNonEmptyOverlap(c, d, isRev); + Node preC = + p == cLen ? c : (isRev ? Word::suffix(c, p) : Word::prefix(c, p)); + Node sk = skc->mkSkolemCached( + z, + preC, + isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT, + "c_spt"); + newSkolems.push_back(sk); + conc = z.eqNode(isRev ? nm->mkNode(STRING_CONCAT, sk, preC) + : nm->mkNode(STRING_CONCAT, preC, sk)); + } + + return conc; +} + +size_t CoreSolver::getSufficientNonEmptyOverlap(Node c, Node d, bool isRev) +{ + Assert(c.isConst() && c.getType().isStringLike()); + Assert(d.isConst() && d.getType().isStringLike()); + size_t p; + size_t p2; + size_t cLen = Word::getLength(c); + if (isRev) + { + // Since non-empty, we start with character 1 + Node c1 = Word::prefix(c, cLen - 1); + p = cLen - Word::roverlap(c1, d); + p2 = Word::rfind(c1, d); + } + else + { + Node c1 = Word::substr(c, 1); + p = cLen - Word::overlap(c1, d); + p2 = Word::find(c1, d); + } + return p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p); +} + +Node CoreSolver::getDecomposeConclusion(Node x, + Node l, + bool isRev, + bool addLenConc, + SkolemCache* skc, + std::vector<Node>& newSkolems) +{ + Assert(l.getType().isInteger()); + NodeManager* nm = NodeManager::currentNM(); + Node n = isRev ? nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), l) : l; + Node sk1 = skc->mkSkolemCached(x, n, SkolemCache::SK_PREFIX, "dc_spt1"); + newSkolems.push_back(sk1); + Node sk2 = skc->mkSkolemCached(x, n, SkolemCache::SK_SUFFIX_REM, "dc_spt2"); + newSkolems.push_back(sk2); + Node conc = x.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk2)); + if (addLenConc) + { + Node lc = nm->mkNode(STRING_LENGTH, isRev ? sk2 : sk1).eqNode(l); + conc = nm->mkNode(AND, conc, lc); + } + return conc; +} + void CoreSolver::getNormalForms(Node eqc, std::vector<NormalForm>& normal_forms, std::map<Node, unsigned>& term_to_nf_index, @@ -920,16 +1089,16 @@ void CoreSolver::processNEqc(Node eqc, if (!StringsEntail::canConstantContainList(c, nfi.d_nf, firstc, lastc)) { Node n = nfi.d_base; + std::vector<Node> exp(nfi.d_exp.begin(), nfi.d_exp.end()); //conflict Trace("strings-solve") << "Normal form for " << n << " cannot be contained in constant " << c << std::endl; // conflict, explanation is: // n = base ^ base = c ^ relevant porition of ( n = N[n] ) - std::vector< Node > exp; - d_bsolver.explainConstantEqc(n,eqc,exp); // Notice although not implemented, this can be minimized based on // firstc/lastc, normal_forms_exp_depend. - exp.insert(exp.end(), nfi.d_exp.begin(), nfi.d_exp.end()); + d_bsolver.explainConstantEqc(n, eqc, exp); d_im.sendInference(exp, d_false, Inference::N_NCTN); + // conflict, finished return; } } @@ -1023,6 +1192,10 @@ void CoreSolver::processNEqc(Node eqc, return; } } + if (d_im.hasProcessed()) + { + break; + } } if (d_im.hasProcessed() || pinfer.empty()) { @@ -1096,7 +1269,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // can infer that this string must be empty Node eq = nfkv[index_k].eqNode(emp); Assert(!d_state.areEqual(emp, nfkv[index_k])); - d_im.sendInference(curr_exp, eq, Inference::N_ENDPOINT_EMP); + d_im.sendInference(curr_exp, eq, Inference::N_ENDPOINT_EMP, isRev); index_k++; } break; @@ -1134,6 +1307,14 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, if (d_state.areEqual(xLenTerm, yLenTerm)) { + std::vector<Node> ant; + NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, ant); + if (x.isConst() && y.isConst()) + { + // if both are constant, it's just a constant conflict + d_im.sendInference(ant, d_false, Inference::N_CONST, isRev, true); + return; + } // `x` and `y` have the same length. We infer that the two components // have to be the same. // @@ -1142,9 +1323,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, << "Simple Case 2 : string lengths are equal" << std::endl; Node eq = x.eqNode(y); Node leneq = xLenTerm.eqNode(yLenTerm); - NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, lenExp); lenExp.push_back(leneq); - d_im.sendInference(lenExp, eq, Inference::N_UNIFY); + // set the explanation for length + Node lant = utils::mkAnd(lenExp); + ant.push_back(lant); + d_im.sendInference(ant, eq, Inference::N_UNIFY, isRev); break; } else if ((!x.isConst() && index == nfiv.size() - rproc - 1) @@ -1180,8 +1363,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, { std::vector<Node> antec; NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, antec); - d_im.sendInference( - antec, eqn[0].eqNode(eqn[1]), Inference::N_ENDPOINT_EQ, true); + d_im.sendInference(antec, + eqn[0].eqNode(eqn[1]), + Inference::N_ENDPOINT_EQ, + isRev, + true); } else { @@ -1241,7 +1427,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // E.g. "abc" ++ ... = "bc" ++ ... ---> conflict std::vector<Node> antec; NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, antec); - d_im.sendInference(antec, d_false, Inference::N_CONST, true); + d_im.sendInference(antec, d_false, Inference::N_CONST, isRev, true); break; } } @@ -1283,8 +1469,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, if (detectLoop(nfi, nfj, index, lhsLoopIdx, rhsLoopIdx, rproc)) { // We are dealing with a looping word equation. + // Note we could make this code also run in the reverse direction, but + // this is not implemented currently. if (!isRev) - { // FIXME + { + // add temporarily to the antecedant of iinfo. NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, iinfo.d_ant); ProcessLoopResult plr = processLoop(lhsLoopIdx != -1 ? nfi : nfj, @@ -1302,6 +1491,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, break; } Assert(plr == ProcessLoopResult::SKIPPED); + // not processing an inference here, undo changes to ant + iinfo.d_ant.clear(); } } @@ -1356,9 +1547,9 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, break; } - // At this point, we know that `nc` is non-empty, so we add that to our - // explanation. - iinfo.d_ant.push_back(expNonEmpty); + // At this point, we know that `nc` is non-empty, so we add expNonEmpty + // to our explanation below. We do this after adding other parts of the + // explanation for consistency with other inferences. size_t ncIndex = index + 1; Node nextConstStr = nfnc.collectConstantStringAt(ncIndex); @@ -1370,35 +1561,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // E.g. "abc" ++ ... = nc ++ "b" ++ ... ---> nc = "a" ++ k size_t cIndex = index; Node stra = nfc.collectConstantStringAt(cIndex); - size_t straLen = Word::getLength(stra); Assert(!stra.isNull()); Node strb = nextConstStr; - // Since `nc` is non-empty, we start with character 1 - size_t p; - if (isRev) - { - Node stra1 = Word::prefix(stra, straLen - 1); - p = straLen - Word::roverlap(stra1, strb); - Trace("strings-csp-debug") - << "Compute roverlap : " << stra1 << " " << strb << std::endl; - size_t p2 = Word::rfind(stra1, strb); - p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p); - Trace("strings-csp-debug") - << "roverlap : " << stra1 << " " << strb << " returned " << p - << " " << p2 << " " << (p2 == std::string::npos) << std::endl; - } - else - { - Node stra1 = Word::substr(stra, 1); - p = straLen - Word::overlap(stra1, strb); - Trace("strings-csp-debug") - << "Compute overlap : " << stra1 << " " << strb << std::endl; - size_t p2 = Word::find(stra1, strb); - p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p); - Trace("strings-csp-debug") - << "overlap : " << stra1 << " " << strb << " returned " << p - << " " << p2 << " " << (p2 == std::string::npos) << std::endl; - } + + // Since `nc` is non-empty, we use the non-empty overlap + size_t p = getSufficientNonEmptyOverlap(stra, strb, isRev); // If we can't split off more than a single character from the // constant, we might as well do regular constant/non-constant @@ -1407,22 +1574,18 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, { NormalForm::getExplanationForPrefixEq( nfc, nfnc, cIndex, ncIndex, iinfo.d_ant); - Node prea = p == straLen ? stra - : (isRev ? Word::suffix(stra, p) - : Word::prefix(stra, p)); + iinfo.d_ant.push_back(expNonEmpty); + // make the conclusion SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached( - nc, - prea, - isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT, - "c_spt"); - Trace("strings-csp") - << "Const Split: " << prea << " is removed from " << stra - << " due to " << strb << ", p=" << p << std::endl; - iinfo.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, prea) - : utils::mkNConcat(prea, sk)); - iinfo.d_new_skolem[LENGTH_SPLIT].push_back(sk); + Node xcv = + nm->mkNode(STRING_CONCAT, isRev ? strb : nc, isRev ? nc : strb); + std::vector<Node> newSkolems; + iinfo.d_conc = getConclusion( + xcv, stra, PfRule::CONCAT_CPROP, isRev, skc, newSkolems); + Assert(newSkolems.size() == 1); + iinfo.d_new_skolem[LENGTH_SPLIT].push_back(newSkolems[0]); iinfo.d_id = Inference::SSPLIT_CST_PROP; + iinfo.d_idRev = isRev; pinfer.push_back(info); break; } @@ -1432,25 +1595,17 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // to start with the first character of the constant. // // E.g. "abc" ++ ... = nc ++ ... ---> nc = "a" ++ k - Node stra = nfcv[index]; - size_t straLen = Word::getLength(stra); - Node firstChar = straLen == 1 ? stra - : (isRev ? Word::suffix(stra, 1) - : Word::prefix(stra, 1)); SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached( - nc, - isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT, - "c_spt"); - Trace("strings-csp") << "Const Split: " << firstChar - << " is removed from " << stra << " (serial) " - << std::endl; + std::vector<Node> newSkolems; + iinfo.d_conc = getConclusion( + nc, nfcv[index], PfRule::CONCAT_CSPLIT, isRev, skc, newSkolems); NormalForm::getExplanationForPrefixEq( nfi, nfj, index, index, iinfo.d_ant); - iinfo.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, firstChar) - : utils::mkNConcat(firstChar, sk)); - iinfo.d_new_skolem[LENGTH_SPLIT].push_back(sk); + iinfo.d_ant.push_back(expNonEmpty); + Assert(newSkolems.size() == 1); + iinfo.d_new_skolem[LENGTH_SPLIT].push_back(newSkolems[0]); iinfo.d_id = Inference::SSPLIT_CST; + iinfo.d_idRev = isRev; pinfer.push_back(info); break; } @@ -1465,7 +1620,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Assert(!y.isConst()); int32_t lentTestSuccess = -1; - Node lentTestExp; + Node lenConstraint; if (options::stringCheckEntailLen()) { // If length entailment checks are enabled, we can save the case split by @@ -1489,54 +1644,81 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Trace("strings-entail") << " explanation was : " << et.second << std::endl; lentTestSuccess = e; - lentTestExp = et.second; + lenConstraint = entLit; + // Its not explained by the equality engine of this class, so its + // marked as not being explained. The length constraint is + // additionally being saved and added to the length constraint + // vector lcVec below, which is added to iinfo.d_ant below. Length + // constraints are being added as the last antecedant for the sake + // of proof reconstruction, which expect length constraints to come + // last. + iinfo.d_noExplain.push_back(lenConstraint); break; } } } } + // lcVec stores the length constraint portion of the antecedant. + std::vector<Node> lcVec; + if (lenConstraint.isNull()) + { + // will do split on length + lenConstraint = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate(); + lcVec.push_back(lenConstraint); + } + else + { + utils::flattenOp(AND, lenConstraint, lcVec); + } NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, iinfo.d_ant); // Add premises for x != "" ^ y != "" for (unsigned xory = 0; xory < 2; xory++) { Node t = xory == 0 ? x : y; - Node tnz = d_state.explainNonEmpty(x); + Node tnz = d_state.explainNonEmpty(t); if (!tnz.isNull()) { - iinfo.d_ant.push_back(tnz); + lcVec.push_back(tnz); } else { tnz = x.eqNode(emp).negate(); - iinfo.d_antn.push_back(tnz); + lcVec.push_back(tnz); + iinfo.d_noExplain.push_back(tnz); } } SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached(x, - y, - isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV - : SkolemCache::SK_ID_V_UNIFIED_SPT, - "v_spt"); - iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); - Node eq1 = - x.eqNode(isRev ? utils::mkNConcat(sk, y) : utils::mkNConcat(y, sk)); - Node eq2 = - y.eqNode(isRev ? utils::mkNConcat(sk, x) : utils::mkNConcat(x, sk)); - - if (lentTestSuccess != -1) - { - iinfo.d_antn.push_back(lentTestExp); - iinfo.d_conc = lentTestSuccess == 0 ? eq1 : eq2; + std::vector<Node> newSkolems; + // make the conclusion + if (lentTestSuccess == -1) + { + iinfo.d_id = Inference::SSPLIT_VAR; + iinfo.d_conc = + getConclusion(x, y, PfRule::CONCAT_SPLIT, isRev, skc, newSkolems); + if (options::stringUnifiedVSpt() && !options::stringLenConc()) + { + Assert(newSkolems.size() == 1); + iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(newSkolems[0]); + } + } + else if (lentTestSuccess == 0) + { iinfo.d_id = Inference::SSPLIT_VAR_PROP; + iinfo.d_conc = + getConclusion(x, y, PfRule::CONCAT_LPROP, isRev, skc, newSkolems); } else { - Node ldeq = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate(); - iinfo.d_ant.push_back(ldeq); - iinfo.d_conc = nm->mkNode(OR, eq1, eq2); - iinfo.d_id = Inference::SSPLIT_VAR; + Assert(lentTestSuccess == 1); + iinfo.d_id = Inference::SSPLIT_VAR_PROP; + iinfo.d_conc = + getConclusion(y, x, PfRule::CONCAT_LPROP, isRev, skc, newSkolems); } + // add the length constraint(s) as the last antecedant + Node lc = utils::mkAnd(lcVec); + iinfo.d_ant.push_back(lc); + iinfo.d_idRev = isRev; pinfer.push_back(info); break; } @@ -1642,7 +1824,8 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, { Trace("strings-loop") << "Strings::Loop: tails are different." << std::endl; - d_im.sendInference(iinfo.d_ant, conc, Inference::FLOOP_CONFLICT, true); + d_im.sendInference( + iinfo.d_ant, conc, Inference::FLOOP_CONFLICT, false, true); return ProcessLoopResult::CONFLICT; } } @@ -1659,6 +1842,8 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, Node expNonEmpty = d_state.explainNonEmpty(t); if (expNonEmpty.isNull()) { + // no antecedants necessary + iinfo.d_ant.clear(); // try to make t equal to empty to avoid loop iinfo.d_conc = nm->mkNode(kind::OR, split_eq, split_eq.negate()); iinfo.d_id = Inference::LEN_SPLIT_EMP; @@ -1675,10 +1860,6 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, } } - Node ant = d_im.mkExplain(iinfo.d_ant); - iinfo.d_ant.clear(); - iinfo.d_antn.push_back(ant); - Node str_in_re; if (s_zy == t_yz && r == emp && s_zy.isConst() && s_zy.getConst<String>().isRepeated()) @@ -1922,32 +2103,30 @@ void CoreSolver::processDeq(Node ni, Node nj) { // Either `x` or `y` is a constant and it is not know whether the // non-empty non-constant is of length one. We split the non-constant - // into a string of length one and the remainder and split on whether - // the first character of the constant and the non-constant are - // equal. + // into a string of length one and the remainder. // - // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" ---> - // x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++ k2) + // len(x)>=1 => x = k1 ++ k2 ^ len(k1) = 1 SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = - skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt"); - d_termReg.registerTermAtomic(sk, LENGTH_ONE); - Node skr = skc->mkSkolemCached( - nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem"); - Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr)); - eq1 = Rewriter::rewrite(eq1); - Node eq2 = - nck.eqNode(nm->mkNode(kind::STRING_CONCAT, firstChar, skr)); - std::vector<Node> antec(nfni.d_exp.begin(), nfni.d_exp.end()); - antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - antec.push_back(expNonEmpty); - d_im.sendInference( - antec, - nm->mkNode( - OR, nm->mkNode(AND, eq1, sk.eqNode(firstChar).negate()), eq2), - Inference::DEQ_DISL_FIRST_CHAR_STRING_SPLIT, - true); - d_im.sendPhaseRequirement(eq1, true); + std::vector<Node> newSkolems; + Node conc = getDecomposeConclusion( + nck, d_one, false, options::stringLenConc(), skc, newSkolems); + Assert(newSkolems.size() == 2); + if (options::stringLenConc()) + { + d_termReg.registerTermAtomic(newSkolems[0], LENGTH_IGNORE); + } + else + { + d_termReg.registerTermAtomic(newSkolems[0], LENGTH_ONE); + } + std::vector<Node> antecLen; + antecLen.push_back(nm->mkNode(GEQ, nckLenTerm, d_one)); + d_im.sendInference(antecLen, + antecLen, + conc, + Inference::DEQ_DISL_FIRST_CHAR_STRING_SPLIT, + false, + true); return; } } @@ -1957,47 +2136,44 @@ void CoreSolver::processDeq(Node ni, Node nj) // are both non-constants. We split them into parts that have the same // lengths. // - // E.g. x ++ x' ++ ... != y ++ y' ++ ... ^ len(x) != len(y) ---> - // len(k1) = len(x) ^ len(k2) = len(y) ^ - // (y = k1 ++ k3 v x = k1 ++ k2) - Trace("strings-solve") << "Non-Simple Case 1 : add lemma " << std::endl; - std::vector<Node> antec(nfni.d_exp.begin(), nfni.d_exp.end()); - antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - std::vector<Node> antecNewLits; - - if (d_state.areDisequal(ni, nj)) - { - antec.push_back(ni.eqNode(nj).negate()); - } - else + // len(x) > len(y) => x = k1 ++ k2 ^ len(k1) = len(y) + // len(y) > len(x) => y = k3 ++ k4 ^ len(k3) = len(x) + Trace("strings-solve") + << "Non-Simple Case 1 : add lemmas " << std::endl; + SkolemCache* skc = d_termReg.getSkolemCache(); + + for (unsigned r = 0; r < 2; r++) { - antecNewLits.push_back(ni.eqNode(nj).negate()); + Node ux = r == 0 ? x : y; + Node uy = r == 0 ? y : x; + Node uxLen = nm->mkNode(STRING_LENGTH, ux); + Node uyLen = nm->mkNode(STRING_LENGTH, uy); + // We always request the length constraint in the conclusion here + // because the skolem needs to have length `uyLen`. If we only assert + // that the skolem's length is greater or equal to one, we can end up + // in a loop: + // + // 1. Split: x = k1 ++ k2 ^ len(k1) >= 1 + // 2. Assume: k2 = "" + // 3. Deduce: x = k1 + // + // After step 3, `k1` is marked congruent because `x` is the older + // variable. So we get `x` in the normal form again. + std::vector<Node> newSkolems; + Node conc = + getDecomposeConclusion(ux, uyLen, false, true, skc, newSkolems); + Assert(newSkolems.size() == 2); + d_termReg.registerTermAtomic(newSkolems[1], LENGTH_GEQ_ONE); + std::vector<Node> antecLen; + antecLen.push_back(nm->mkNode(GT, uxLen, uyLen)); + d_im.sendInference(antecLen, + antecLen, + conc, + Inference::DEQ_DISL_STRINGS_SPLIT, + false, + true); } - antecNewLits.push_back(xLenTerm.eqNode(yLenTerm).negate()); - std::vector<Node> conc; - SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk1 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); - Node sk2 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); - Node sk3 = - skc->mkSkolemCached(y, x, SkolemCache::SK_ID_V_SPT, "z_dsplit"); - Node sk4 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_V_SPT, "w_dsplit"); - d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE); - Node sk1Len = utils::mkNLength(sk1); - conc.push_back(sk1Len.eqNode(xLenTerm)); - Node sk2Len = utils::mkNLength(sk2); - conc.push_back(sk2Len.eqNode(yLenTerm)); - conc.push_back(nm->mkNode(OR, - y.eqNode(utils::mkNConcat(sk1, sk3)), - x.eqNode(utils::mkNConcat(sk2, sk4)))); - d_im.sendInference(antec, - antecNewLits, - nm->mkNode(AND, conc), - Inference::DEQ_DISL_STRINGS_SPLIT, - true); return; } } @@ -2090,7 +2266,7 @@ bool CoreSolver::processSimpleDeq(std::vector<Node>& nfi, Node conc = cc.size() == 1 ? cc[0] : NodeManager::currentNM()->mkNode(kind::AND, cc); - d_im.sendInference(ant, conc, Inference::DEQ_NORM_EMP, true); + d_im.sendInference(ant, conc, Inference::DEQ_NORM_EMP, isRev, true); return true; } @@ -2366,16 +2542,16 @@ void CoreSolver::checkLengthsEqc() { // if not, add the lemma std::vector<Node> ant; ant.insert(ant.end(), nfi.d_exp.begin(), nfi.d_exp.end()); - ant.push_back(nfi.d_base.eqNode(lt)); + ant.push_back(lt.eqNode(nfi.d_base)); Node lc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, nf); Node lcr = Rewriter::rewrite(lc); Trace("strings-process-debug") << "Rewrote length " << lc << " to " << lcr << std::endl; if (!d_state.areEqual(llt, lcr)) { - Node eq = llt.eqNode(lcr); + Node eq = llt.eqNode(lc); ei->d_normalizedLength.set(eq); - d_im.sendInference(ant, eq, Inference::LEN_NORM, true); + d_im.sendInference(ant, eq, Inference::LEN_NORM, false, true); } } } @@ -2385,9 +2561,9 @@ bool CoreSolver::processInferInfo(CoreInferInfo& cii) { InferInfo& ii = cii.d_infer; // rewrite the conclusion, ensure non-trivial - ii.d_conc = Rewriter::rewrite(ii.d_conc); + Node concr = Rewriter::rewrite(ii.d_conc); - if (ii.isTrivial()) + if (concr == d_true) { // conclusion rewrote to true return false; @@ -2401,7 +2577,8 @@ bool CoreSolver::processInferInfo(CoreInferInfo& cii) // send phase requirements for (const std::pair<const Node, bool>& pp : cii.d_pendingPhase) { - d_im.sendPhaseRequirement(pp.first, pp.second); + Node ppr = Rewriter::rewrite(pp.first); + d_im.addPendingPhaseRequirement(ppr, pp.second); } // send the inference, which is a lemma diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h index db1f5ecf6..b1c302935 100644 --- a/src/theory/strings/core_solver.h +++ b/src/theory/strings/core_solver.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -81,9 +81,7 @@ class CoreSolver typedef context::CDHashMap<Node, int, NodeHashFunction> NodeIntMap; public: - CoreSolver(context::Context* c, - context::UserContext* u, - SolverState& s, + CoreSolver(SolverState& s, InferenceManager& im, TermRegistry& tr, BaseSolver& bs); @@ -219,6 +217,67 @@ class CoreSolver */ Node getNormalString(Node x, std::vector<Node>& nf_exp); //-------------------------- end query functions + + /** + * This returns the conclusion of the proof rule corresponding to splitting + * on the arrangement of terms x and y appearing in an equation of the form + * x ++ x' = y ++ y' or x' ++ x = y' ++ y + * where we are in the second case if isRev is true. This method is called + * both by the core solver and by the strings proof checker. + * + * @param x The first term + * @param y The second term + * @param rule The proof rule whose conclusion we are asking for + * @param isRev Whether the equation is in a reverse direction + * @param skc The skolem cache (to allocate fresh variables if necessary) + * @param newSkolems The vector to add new variables to + * @return The conclusion of the inference. + */ + static Node getConclusion(Node x, + Node y, + PfRule rule, + bool isRev, + SkolemCache* skc, + std::vector<Node>& newSkolems); + /** + * Get sufficient non-empty overlap of string constants c and d. + * + * This is called when handling equations of the form: + * x ++ d ++ ... = c ++ ... + * when x is non-empty and non-constant. + * + * This returns the maximal index in c which x must have as a prefix, which + * notice is an integer >= 1 since x is non-empty. + * + * @param c The first constant + * @param d The second constant + * @param isRev Whether the equation is in the reverse direction + * @return The position in c. + */ + static size_t getSufficientNonEmptyOverlap(Node c, Node d, bool isRev); + /** + * This returns the conclusion of the decompose proof rule. This returns + * a conjunction of splitting string x into pieces based on length l, e.g.: + * x = k_1 ++ k_2 + * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of + * length l if isRev is false (resp. true). The function optionally adds a + * length constraint len(k_1) = l (resp. len(k_2) = l). + * + * @param x The string term + * @param l The length term + * @param isRev Whether the equation is in a reverse direction + * @param addLenConc Whether to add the length constraint + * @param skc The skolem cache (to allocate fresh variables if necessary) + * @param newSkolems The vector to add new variables to + * @return The conclusion of the inference. + */ + static Node getDecomposeConclusion(Node x, + Node l, + bool isRev, + bool addLenConc, + SkolemCache* skc, + std::vector<Node>& newSkolems); + private: /** * This processes the infer info ii as an inference. In more detail, it calls diff --git a/src/theory/strings/eqc_info.cpp b/src/theory/strings/eqc_info.cpp index 31d7f8b01..45062c59a 100644 --- a/src/theory/strings/eqc_info.cpp +++ b/src/theory/strings/eqc_info.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/eqc_info.h b/src/theory/strings/eqc_info.h index 108264969..c76102cbc 100644 --- a/src/theory/strings/eqc_info.h +++ b/src/theory/strings/eqc_info.h @@ -5,7 +5,7 @@ ** Andrew Reynolds ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 9b1b0e6dd..7e416d132 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -2,10 +2,10 @@ /*! \file extf_solver.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** Andrew Reynolds, Tianyi Liang, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -27,9 +27,7 @@ namespace CVC4 { namespace theory { namespace strings { -ExtfSolver::ExtfSolver(context::Context* c, - context::UserContext* u, - SolverState& s, +ExtfSolver::ExtfSolver(SolverState& s, InferenceManager& im, TermRegistry& tr, StringsRewriter& rewriter, @@ -45,10 +43,10 @@ ExtfSolver::ExtfSolver(context::Context* c, d_csolver(cs), d_extt(et), d_statistics(statistics), - d_preproc(d_termReg.getSkolemCache(), u, statistics), - d_hasExtf(c, false), - d_extfInferCache(c), - d_reduced(u) + d_preproc(d_termReg.getSkolemCache(), s.getUserContext(), statistics), + d_hasExtf(s.getSatContext(), false), + d_extfInferCache(s.getSatContext()), + d_reduced(s.getUserContext()) { d_extt.addFunctionKind(kind::STRING_SUBSTR); d_extt.addFunctionKind(kind::STRING_UPDATE); @@ -67,6 +65,7 @@ ExtfSolver::ExtfSolver(context::Context* c, d_extt.addFunctionKind(kind::STRING_TOUPPER); d_extt.addFunctionKind(kind::STRING_REV); d_extt.addFunctionKind(kind::SEQ_UNIT); + d_extt.addFunctionKind(kind::SEQ_NTH); d_true = NodeManager::currentNM()->mkConst(true); d_false = NodeManager::currentNM()->mkConst(false); @@ -74,17 +73,21 @@ ExtfSolver::ExtfSolver(context::Context* c, ExtfSolver::~ExtfSolver() {} +void ExtfSolver::addSharedTerm(TNode n) { d_extt.registerTermRec(n); } + bool ExtfSolver::doReduction(int effort, Node n) { Assert(d_extfInfoTmp.find(n) != d_extfInfoTmp.end()); if (!d_extfInfoTmp[n].d_modelActive) { // n is not active in the model, no need to reduce + Trace("strings-extf-debug") << "...skip due to model active" << std::endl; return false; } if (d_reduced.find(n)!=d_reduced.end()) { // already sent a reduction lemma + Trace("strings-extf-debug") << "...skip due to reduced" << std::endl; return false; } // determine the effort level to process the extf at @@ -126,7 +129,8 @@ bool ExtfSolver::doReduction(int effort, Node n) lexp.push_back(lenx.eqNode(lens)); lexp.push_back(n.negate()); Node xneqs = x.eqNode(s).negate(); - d_im.sendInference(lexp, xneqs, Inference::CTN_NEG_EQUAL, true); + d_im.sendInference( + lexp, xneqs, Inference::CTN_NEG_EQUAL, false, true); } // this depends on the current assertions, so this // inference is context-dependent @@ -155,6 +159,8 @@ bool ExtfSolver::doReduction(int effort, Node n) } if (effort != r_effort) { + + Trace("strings-extf-debug") << "...skip due to effort" << std::endl; // not the right effort level to reduce return false; } @@ -167,12 +173,13 @@ bool ExtfSolver::doReduction(int effort, Node n) Node s = n[1]; // positive contains reduces to a equality SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk1 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1"); - Node sk2 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2"); - Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2))); - std::vector<Node> exp_vec; - exp_vec.push_back(n); - d_im.sendInference(d_emptyVec, exp_vec, eq, Inference::CTN_POS, true); + Node eq = d_termReg.eagerReduce(n, skc); + Assert(!eq.isNull()); + Assert(eq.getKind() == ITE && eq[0] == n); + eq = eq[1]; + std::vector<Node> expn; + expn.push_back(n); + d_im.sendInference(expn, expn, eq, Inference::CTN_POS, false, true); Trace("strings-extf-debug") << " resolve extf : " << n << " based on positive contain reduction." << std::endl; @@ -186,21 +193,22 @@ bool ExtfSolver::doReduction(int effort, Node n) NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_STRREPLALL + || k == STRING_STRREPL || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); std::vector<Node> new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); - new_nodes.push_back(res.eqNode(n)); + new_nodes.push_back(n.eqNode(res)); Node nnlem = new_nodes.size() == 1 ? new_nodes[0] : nm->mkNode(AND, new_nodes); - nnlem = Rewriter::rewrite(nnlem); Trace("strings-red-lemma") << "Reduction_" << effort << " lemma : " << nnlem << std::endl; Trace("strings-red-lemma") << "...from " << n << std::endl; - d_im.sendInference(d_emptyVec, nnlem, Inference::REDUCTION, true); + Trace("strings-red-lemma") + << "Reduction_" << effort << " rewritten : " << Rewriter::rewrite(nnlem) << std::endl; + d_im.sendInference(d_emptyVec, nnlem, Inference::REDUCTION, false, true); Trace("strings-extf-debug") << " resolve extf : " << n << " based on reduction." << std::endl; // add as reduction lemma @@ -275,7 +283,8 @@ void ExtfSolver::checkExtfEval(int effort) } // If there is information involving the children, attempt to do an // inference and/or mark n as reduced. - Node to_reduce; + bool reduced = false; + Node to_reduce = n; if (schanged) { Node sn = nm->mkNode(n.getKind(), schildren); @@ -381,13 +390,8 @@ void ExtfSolver::checkExtfEval(int effort) Trace("strings-extf") << " resolve extf : " << sn << " -> " << nrc << std::endl; Inference inf = effort == 0 ? Inference::EXTF : Inference::EXTF_N; - d_im.sendInference(einfo.d_exp, conc, inf, true); + d_im.sendInference(einfo.d_exp, conc, inf, false, true); d_statistics.d_cdSimplifications << n.getKind(); - if (d_state.isInConflict()) - { - Trace("strings-extf-debug") << " conflict, return." << std::endl; - return; - } } } else @@ -402,6 +406,7 @@ void ExtfSolver::checkExtfEval(int effort) einfo.d_modelActive = false; } } + reduced = true; } else { @@ -425,28 +430,26 @@ void ExtfSolver::checkExtfEval(int effort) effort == 0 ? Inference::EXTF_D : Inference::EXTF_D_N; d_im.sendInternalInference(einfo.d_exp, nrcAssert, infer); } - // We must use the original n here to avoid circular justifications for - // why extended functions are reduced below. In particular, to_reduce - // should never be a duplicate of another term considered in the block - // of code for checkExtfInference below. - to_reduce = n; + to_reduce = nrc; } } - else - { - to_reduce = n; - } + // We must use the original n here to avoid circular justifications for + // why extended functions are reduced. In particular, n should never be a + // duplicate of another term considered in the block of code for + // checkExtfInference below. // if not reduced and not processed - if (!to_reduce.isNull() - && inferProcessed.find(to_reduce) == inferProcessed.end()) + if (!reduced && !n.isNull() + && inferProcessed.find(n) == inferProcessed.end()) { - inferProcessed.insert(to_reduce); + inferProcessed.insert(n); Assert(effort < 3); if (effort == 1) { Trace("strings-extf") << " cannot rewrite extf : " << to_reduce << std::endl; } + // we take to_reduce to be the (partially) reduced version of n, which + // is justified by the explanation in einfo. checkExtfInference(n, to_reduce, einfo, effort); if (Trace.isOn("strings-extf-list")) { @@ -466,6 +469,11 @@ void ExtfSolver::checkExtfEval(int effort) has_nreduce = true; } } + if (d_state.isInConflict()) + { + Trace("strings-extf-debug") << " conflict, return." << std::endl; + return; + } } d_hasExtf = has_nreduce; } @@ -623,13 +631,13 @@ void ExtfSolver::checkExtfInference(Node n, } else { - // If we already know that s (does not) contain t, then n is redundant. - // For example, if str.contains( x, y ), str.contains( z, y ), and x=z - // are asserted in the current context, then str.contains( z, y ) is - // satisfied by all models of str.contains( x, y ) ^ x=z and thus can - // be ignored. + // If we already know that s (does not) contain t, then n may be + // redundant. However, we do not mark n as reduced here, since strings + // reductions may require dependencies between extended functions. + // Marking reduced here could lead to incorrect models if an + // extended function is marked reduced based on an assignment to + // something that depends on n. Trace("strings-extf-debug") << " redundant." << std::endl; - d_extt.markReduced(n); } } return; @@ -699,6 +707,23 @@ std::vector<Node> ExtfSolver::getActive(Kind k) const return d_extt.getActive(k); } +bool StringsExtfCallback::getCurrentSubstitution( + int effort, + const std::vector<Node>& vars, + std::vector<Node>& subs, + std::map<Node, std::vector<Node> >& exp) +{ + Trace("strings-subs") << "getCurrentSubstitution, effort = " << effort + << std::endl; + for (const Node& v : vars) + { + Trace("strings-subs") << " get subs for " << v << "..." << std::endl; + Node s = d_esolver->getCurrentSubstitutionFor(effort, v, exp[v]); + subs.push_back(s); + } + return true; +} + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h index d99a881f6..df0a7ccb5 100644 --- a/src/theory/strings/extf_solver.h +++ b/src/theory/strings/extf_solver.h @@ -2,10 +2,10 @@ /*! \file extf_solver.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli + ** Andrew Reynolds, Andres Noetzli, Tim King ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -83,9 +83,7 @@ class ExtfSolver typedef context::CDHashSet<Node, NodeHashFunction> NodeSet; public: - ExtfSolver(context::Context* c, - context::UserContext* u, - SolverState& s, + ExtfSolver(SolverState& s, InferenceManager& im, TermRegistry& tr, StringsRewriter& rewriter, @@ -95,6 +93,11 @@ class ExtfSolver SequencesStatistics& statistics); ~ExtfSolver(); + /** + * Called when a shared term is added to theory of strings, this registers + * n with the extended theory utility for context-depdendent simplification. + */ + void addSharedTerm(TNode n); /** check extended functions evaluation * * This applies "context-dependent simplification" for all active extended @@ -211,6 +214,23 @@ class ExtfSolver NodeSet d_reduced; }; +/** An extended theory callback */ +class StringsExtfCallback : public ExtTheoryCallback +{ + public: + StringsExtfCallback() : d_esolver(nullptr) {} + /** + * Get current substitution based on the underlying extended function + * solver. + */ + bool getCurrentSubstitution(int effort, + const std::vector<Node>& vars, + std::vector<Node>& subs, + std::map<Node, std::vector<Node> >& exp) override; + /** The extended function solver */ + ExtfSolver* d_esolver; +}; + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp index c75e03440..0d2f94f91 100644 --- a/src/theory/strings/infer_info.cpp +++ b/src/theory/strings/infer_info.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -14,6 +14,9 @@ #include "theory/strings/infer_info.h" +#include "theory/strings/inference_manager.h" +#include "theory/strings/theory_strings_utils.h" + namespace CVC4 { namespace theory { namespace strings { @@ -85,6 +88,7 @@ const char* toString(Inference i) case Inference::CTN_NEG_EQUAL: return "CTN_NEG_EQUAL"; case Inference::CTN_POS: return "CTN_POS"; case Inference::REDUCTION: return "REDUCTION"; + case Inference::PREFIX_CONFLICT: return "PREFIX_CONFLICT"; default: return "?"; } } @@ -95,7 +99,18 @@ std::ostream& operator<<(std::ostream& out, Inference i) return out; } -InferInfo::InferInfo() : d_id(Inference::NONE) {} +InferInfo::InferInfo() : d_sim(nullptr), d_id(Inference::NONE), d_idRev(false) +{ +} + +bool InferInfo::process(TheoryInferenceManager* im, bool asLemma) +{ + if (asLemma) + { + return d_sim->processLemma(*this); + } + return d_sim->processFact(*this); +} bool InferInfo::isTrivial() const { @@ -106,26 +121,36 @@ bool InferInfo::isTrivial() const bool InferInfo::isConflict() const { Assert(!d_conc.isNull()); - return d_conc.isConst() && !d_conc.getConst<bool>() && d_antn.empty(); + return d_conc.isConst() && !d_conc.getConst<bool>() && d_noExplain.empty(); } bool InferInfo::isFact() const { Assert(!d_conc.isNull()); TNode atom = d_conc.getKind() == kind::NOT ? d_conc[0] : d_conc; - return !atom.isConst() && atom.getKind() != kind::OR && d_antn.empty(); + return !atom.isConst() && atom.getKind() != kind::OR && d_noExplain.empty(); +} + +Node InferInfo::getAntecedant() const +{ + // d_noExplain is a subset of d_ant + return utils::mkAnd(d_ant); } std::ostream& operator<<(std::ostream& out, const InferInfo& ii) { out << "(infer " << ii.d_id << " " << ii.d_conc; + if (ii.d_idRev) + { + out << " :rev"; + } if (!ii.d_ant.empty()) { out << " :ant (" << ii.d_ant << ")"; } - if (!ii.d_antn.empty()) + if (!ii.d_noExplain.empty()) { - out << " :antn (" << ii.d_antn << ")"; + out << " :no-explain (" << ii.d_noExplain << ")"; } out << ")"; return out; diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h index 2a42b9fab..4c5674d2b 100644 --- a/src/theory/strings/infer_info.h +++ b/src/theory/strings/infer_info.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -21,6 +21,7 @@ #include <vector> #include "expr/node.h" +#include "theory/theory_inference.h" #include "util/safe_print.h" namespace CVC4 { @@ -35,9 +36,17 @@ namespace strings { * Note: The order in this enum matters in certain cases (e.g. inferences * related to normal forms), inferences that come first are generally * preferred. + * + * Notice that an inference is intentionally distinct from PfRule. An + * inference captures *why* we performed a reasoning step, and a PfRule + * rule captures *what* reasoning step was used. For instance, the inference + * LEN_SPLIT translates to PfRule::SPLIT. The use of stats on inferences allows + * us to know that we performed N splits (PfRule::SPLIT) because we wanted + * to split on lengths for string equalities (Inference::LEN_SPLIT). */ enum class Inference : uint32_t { + BEGIN, //-------------------------------------- base solver // initial normalize singular // x1 = "" ^ ... ^ x_{i-1} = "" ^ x_{i+1} = "" ^ ... ^ xn = "" => @@ -295,6 +304,10 @@ enum class Inference : uint32_t // (see theory_strings_preprocess). REDUCTION, //-------------------------------------- end extended function solver + //-------------------------------------- prefix conflict + // prefix conflict (coarse-grained) + PREFIX_CONFLICT, + //-------------------------------------- end prefix conflict NONE, }; @@ -335,18 +348,42 @@ enum LengthStatus LENGTH_GEQ_ONE }; +class InferenceManager; + /** * An inference. This is a class to track an unprocessed call to either * send a fact, lemma, or conflict that is waiting to be asserted to the * equality engine or sent on the output channel. + * + * For the sake of proofs, the antecedants in InferInfo have a particular + * ordering for many of the core strings rules, which is expected by + * InferProofCons for constructing proofs of F_CONST, F_UNIFY, N_CONST, etc. + * which apply to a pair of string terms t and s. At a high level, the ordering + * expected in d_ant is: + * (1) (multiple) literals that explain why t and s have the same prefix/suffix, + * (2) t = s, + * (3) (optionally) a length constraint. + * For example, say we have: + * { x ++ y ++ v1 = z ++ w ++ v2, x = z ++ u, u = "", len(y) = len(w) } + * We can conclude y = w by the N_UNIFY rule from the left side. The antecedant + * has the following form: + * - (prefix up to y/w equal) x = z ++ u, u = "", + * - (main equality) x ++ y ++ v1 = z ++ w ++ v2, + * - (length constraint) len(y) = len(w). */ -class InferInfo +class InferInfo : public TheoryInference { public: InferInfo(); ~InferInfo() {} + /** Process this inference */ + bool process(TheoryInferenceManager* im, bool asLemma) override; + /** Pointer to the class used for processing this info */ + InferenceManager* d_sim; /** The inference identifier */ Inference d_id; + /** Whether it is the reverse form of the above id */ + bool d_idRev; /** The conclusion */ Node d_conc; /** @@ -357,9 +394,11 @@ class InferInfo /** * The "new literal" antecedant(s) of the inference, interpreted * conjunctively. These are literals that were needed to show the conclusion - * but do not currently hold in the equality engine. + * but do not currently hold in the equality engine. These should be a subset + * of d_ant. In other words, antecedants that are not explained are stored + * in *both* d_ant and d_noExplain. */ - std::vector<Node> d_antn; + std::vector<Node> d_noExplain; /** * A list of new skolems introduced as a result of this inference. They * are mapped to by a length status, indicating the length constraint that @@ -370,15 +409,17 @@ class InferInfo bool isTrivial() const; /** * Does this infer info correspond to a conflict? True if d_conc is false - * and it has no new antecedants (d_antn). + * and it has no new antecedants (d_noExplain). */ bool isConflict() const; /** * Does this infer info correspond to a "fact". A fact is an inference whose * conclusion should be added as an equality or predicate to the equality - * engine with no new external antecedants (d_antn). + * engine with no new external antecedants (d_noExplain). */ bool isFact() const; + /** Get antecedant */ + Node getAntecedant() const; }; /** diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp new file mode 100644 index 000000000..66f71bf14 --- /dev/null +++ b/src/theory/strings/infer_proof_cons.cpp @@ -0,0 +1,1016 @@ +/********************* */ +/*! \file infer_proof_cons.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of inference to proof conversion + **/ + +#include "theory/strings/infer_proof_cons.h" + +#include "expr/skolem_manager.h" +#include "options/smt_options.h" +#include "options/strings_options.h" +#include "theory/builtin/proof_checker.h" +#include "theory/rewriter.h" +#include "theory/strings/regexp_operation.h" +#include "theory/strings/theory_strings_utils.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +InferProofCons::InferProofCons(context::Context* c, + ProofNodeManager* pnm, + SequencesStatistics& statistics) + : d_pnm(pnm), d_lazyFactMap(c), d_statistics(statistics) +{ + Assert(d_pnm != nullptr); +} + +void InferProofCons::notifyFact(const InferInfo& ii) +{ + Node fact = ii.d_conc; + Trace("strings-ipc-debug") + << "InferProofCons::notifyFact: " << ii << std::endl; + if (d_lazyFactMap.find(fact) != d_lazyFactMap.end()) + { + Trace("strings-ipc-debug") << "...duplicate!" << std::endl; + return; + } + Node symFact = CDProof::getSymmFact(fact); + if (!symFact.isNull() && d_lazyFactMap.find(symFact) != d_lazyFactMap.end()) + { + Trace("strings-ipc-debug") << "...duplicate (sym)!" << std::endl; + return; + } + std::shared_ptr<InferInfo> iic = std::make_shared<InferInfo>(ii); + d_lazyFactMap.insert(ii.d_conc, iic); +} + +void InferProofCons::convert(Inference infer, + bool isRev, + Node conc, + const std::vector<Node>& exp, + ProofStep& ps, + TheoryProofStepBuffer& psb, + bool& useBuffer) +{ + // by default, don't use the buffer + useBuffer = false; + // Must flatten children with respect to AND to be ready to explain. + // We store the index where each flattened vector begins, since some + // explanations are grouped together using AND. + std::vector<size_t> startExpIndex; + for (const Node& ec : exp) + { + // store the index in the flattened vector + startExpIndex.push_back(ps.d_children.size()); + utils::flattenOp(AND, ec, ps.d_children); + } + // debug print + if (Trace.isOn("strings-ipc-debug")) + { + Trace("strings-ipc-debug") << "InferProofCons::convert: " << infer + << (isRev ? " :rev " : " ") << conc << std::endl; + for (const Node& ec : exp) + { + Trace("strings-ipc-debug") << " e: " << ec << std::endl; + } + } + // try to find a set of proof steps to incorporate into the buffer + psb.clear(); + NodeManager* nm = NodeManager::currentNM(); + Node nodeIsRev = nm->mkConst(isRev); + switch (infer) + { + // ========================== equal by substitution+rewriting + case Inference::I_NORM_S: + case Inference::I_CONST_MERGE: + case Inference::I_NORM: + case Inference::LEN_NORM: + case Inference::NORMAL_FORM: + case Inference::CODE_PROXY: + { + ps.d_args.push_back(conc); + // will attempt this rule + ps.d_rule = PfRule::MACRO_SR_PRED_INTRO; + } + break; + // ========================== substitution + rewriting + case Inference::RE_NF_CONFLICT: + case Inference::EXTF: + case Inference::EXTF_N: + case Inference::EXTF_D: + case Inference::EXTF_D_N: + case Inference::I_CONST_CONFLICT: + case Inference::UNIT_CONST_CONFLICT: + { + if (!ps.d_children.empty()) + { + std::vector<Node> exps(ps.d_children.begin(), ps.d_children.end() - 1); + Node src = ps.d_children[ps.d_children.size() - 1]; + if (psb.applyPredTransform(src, conc, exps)) + { + useBuffer = true; + } + } + if (!useBuffer) + { + // use the predicate version? + ps.d_args.push_back(conc); + ps.d_rule = PfRule::MACRO_SR_PRED_INTRO; + } + } + break; + // ========================== rewrite pred + case Inference::EXTF_EQ_REW: + case Inference::INFER_EMP: + { + // the last child is the predicate we are operating on, move to front + Node src = ps.d_children[ps.d_children.size() - 1]; + std::vector<Node> expe(ps.d_children.begin(), ps.d_children.end() - 1); + // start with a default rewrite + Node mainEqSRew = psb.applyPredElim(src, expe); + if (mainEqSRew == conc) + { + useBuffer = true; + break; + } + // may need the "extended equality rewrite" + Node mainEqSRew2 = psb.applyPredElim( + mainEqSRew, {}, MethodId::SB_DEFAULT, MethodId::RW_REWRITE_EQ_EXT); + if (mainEqSRew2 == conc) + { + useBuffer = true; + break; + } + // rewrite again with default rewriter + Node mainEqSRew3 = psb.applyPredElim(mainEqSRew2, {}); + useBuffer = (mainEqSRew3 == conc); + } + break; + // ========================== substitution+rewriting, CONCAT_EQ, ... + case Inference::F_CONST: + case Inference::F_UNIFY: + case Inference::F_ENDPOINT_EMP: + case Inference::F_ENDPOINT_EQ: + case Inference::F_NCTN: + case Inference::N_EQ_CONF: + case Inference::N_CONST: + case Inference::N_UNIFY: + case Inference::N_ENDPOINT_EMP: + case Inference::N_ENDPOINT_EQ: + case Inference::N_NCTN: + case Inference::SSPLIT_CST_PROP: + case Inference::SSPLIT_VAR_PROP: + case Inference::SSPLIT_CST: + case Inference::SSPLIT_VAR: + { + Trace("strings-ipc-core") << "Generate core rule for " << infer + << " (rev=" << isRev << ")" << std::endl; + // All of the above inferences have the form: + // (explanation for why t and s have the same prefix/suffix) ^ + // t = s ^ + // (length constraint)? + // We call t=s the "main equality" below. The length constraint is + // optional, which we split on below. + size_t nchild = ps.d_children.size(); + size_t mainEqIndex = 0; + bool mainEqIndexSet = false; + // the length constraint + std::vector<Node> lenConstraint; + // these inferences have a length constraint as the last explain + if (infer == Inference::N_UNIFY || infer == Inference::F_UNIFY + || infer == Inference::SSPLIT_CST || infer == Inference::SSPLIT_VAR + || infer == Inference::SSPLIT_VAR_PROP + || infer == Inference::SSPLIT_CST_PROP) + { + if (exp.size() >= 2) + { + Assert(exp.size() <= startExpIndex.size()); + // The index of the "main" equality is the last equality before + // the length explanation. + mainEqIndex = startExpIndex[exp.size() - 1] - 1; + mainEqIndexSet = true; + // the remainder is the length constraint + lenConstraint.insert(lenConstraint.end(), + ps.d_children.begin() + mainEqIndex + 1, + ps.d_children.end()); + } + } + else if (nchild >= 1) + { + // The index of the main equality is the last child. + mainEqIndex = nchild - 1; + mainEqIndexSet = true; + } + Node mainEq; + if (mainEqIndexSet) + { + mainEq = ps.d_children[mainEqIndex]; + Trace("strings-ipc-core") << "Main equality " << mainEq << " at index " + << mainEqIndex << std::endl; + } + if (mainEq.isNull() || mainEq.getKind() != EQUAL) + { + Trace("strings-ipc-core") + << "...failed to find main equality" << std::endl; + break; + } + // apply MACRO_SR_PRED_ELIM using equalities up to the main eq + std::vector<Node> childrenSRew; + childrenSRew.push_back(mainEq); + childrenSRew.insert(childrenSRew.end(), + ps.d_children.begin(), + ps.d_children.begin() + mainEqIndex); + Node mainEqSRew = + psb.tryStep(PfRule::MACRO_SR_PRED_ELIM, childrenSRew, {}); + if (CDProof::isSame(mainEqSRew, mainEq)) + { + Trace("strings-ipc-core") << "...undo step" << std::endl; + // the rule added above was not necessary + psb.popStep(); + } + else if (mainEqSRew == conc) + { + Trace("strings-ipc-core") << "...success after rewrite!" << std::endl; + useBuffer = true; + break; + } + Trace("strings-ipc-core") + << "Main equality after subs+rewrite " << mainEqSRew << std::endl; + // now, apply CONCAT_EQ to get the remainder + std::vector<Node> childrenCeq; + childrenCeq.push_back(mainEqSRew); + std::vector<Node> argsCeq; + argsCeq.push_back(nodeIsRev); + Node mainEqCeq = psb.tryStep(PfRule::CONCAT_EQ, childrenCeq, argsCeq); + Trace("strings-ipc-core") + << "Main equality after CONCAT_EQ " << mainEqCeq << std::endl; + if (mainEqCeq.isNull() || mainEqCeq.getKind() != EQUAL) + { + // fail + break; + } + else if (mainEqCeq == mainEqSRew) + { + Trace("strings-ipc-core") << "...undo step" << std::endl; + // not necessary, probably first component of equality + psb.popStep(); + } + // Now, mainEqCeq is an equality t ++ ... == s ++ ... where the + // inference involved t and s. + if (infer == Inference::N_ENDPOINT_EQ + || infer == Inference::N_ENDPOINT_EMP + || infer == Inference::F_ENDPOINT_EQ + || infer == Inference::F_ENDPOINT_EMP) + { + // Should be equal to conclusion already, or rewrite to it. + // Notice that this step is necessary to handle the "rproc" + // optimization in processSimpleNEq. Alternatively, this could + // possibly be done by CONCAT_EQ with !isRev. + std::vector<Node> cexp; + if (psb.applyPredTransform(mainEqCeq, + conc, + cexp, + MethodId::SB_DEFAULT, + MethodId::RW_REWRITE_EQ_EXT)) + { + Trace("strings-ipc-core") << "Transformed to " << conc + << " via pred transform" << std::endl; + // success + useBuffer = true; + Trace("strings-ipc-core") << "...success!" << std::endl; + } + // Otherwise, note that EMP rules conclude ti = "" where + // t1 ++ ... ++ tn == "". However, these are very rarely applied, let + // alone for 2+ children. This case is intentionally unhandled here. + } + else if (infer == Inference::N_CONST || infer == Inference::F_CONST + || infer == Inference::N_EQ_CONF) + { + // should be a constant conflict + std::vector<Node> childrenC; + childrenC.push_back(mainEqCeq); + std::vector<Node> argsC; + argsC.push_back(nodeIsRev); + Node mainEqC = psb.tryStep(PfRule::CONCAT_CONFLICT, childrenC, argsC); + if (mainEqC == conc) + { + useBuffer = true; + Trace("strings-ipc-core") << "...success!" << std::endl; + } + } + else + { + std::vector<Node> tvec; + std::vector<Node> svec; + utils::getConcat(mainEqCeq[0], tvec); + utils::getConcat(mainEqCeq[1], svec); + Node t0 = tvec[isRev ? tvec.size() - 1 : 0]; + Node s0 = svec[isRev ? svec.size() - 1 : 0]; + bool applySym = false; + // may need to apply symmetry + if ((infer == Inference::SSPLIT_CST + || infer == Inference::SSPLIT_CST_PROP) + && t0.isConst()) + { + Assert(!s0.isConst()); + applySym = true; + std::swap(t0, s0); + } + if (infer == Inference::N_UNIFY || infer == Inference::F_UNIFY) + { + if (conc.getKind() != EQUAL) + { + break; + } + // one side should match, the other side could be a split constant + if (conc[0] != t0 && conc[1] != s0) + { + applySym = true; + std::swap(t0, s0); + } + Assert(conc[0].isConst() == t0.isConst()); + Assert(conc[1].isConst() == s0.isConst()); + } + PfRule rule = PfRule::UNKNOWN; + // the form of the required length constraint expected by the proof + Node lenReq; + bool lenSuccess = false; + if (infer == Inference::N_UNIFY || infer == Inference::F_UNIFY) + { + // the required premise for unify is always len(x) = len(y), + // however the explanation may not be literally this. Thus, we + // need to reconstruct a proof from the given explanation. + // it should be the case that lenConstraint => lenReq. + // We use terms in the conclusion equality, not t0, s0 here. + lenReq = nm->mkNode(STRING_LENGTH, conc[0]) + .eqNode(nm->mkNode(STRING_LENGTH, conc[1])); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_UNIFY; + } + else if (infer == Inference::SSPLIT_VAR) + { + // it should be the case that lenConstraint => lenReq + lenReq = nm->mkNode(STRING_LENGTH, t0) + .eqNode(nm->mkNode(STRING_LENGTH, s0)) + .notNode(); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_SPLIT; + } + else if (infer == Inference::SSPLIT_CST) + { + // it should be the case that lenConstraint => lenReq + lenReq = nm->mkNode(STRING_LENGTH, t0) + .eqNode(nm->mkConst(Rational(0))) + .notNode(); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_CSPLIT; + } + else if (infer == Inference::SSPLIT_VAR_PROP) + { + // it should be the case that lenConstraint => lenReq + for (unsigned r = 0; r < 2; r++) + { + lenReq = nm->mkNode(GT, + nm->mkNode(STRING_LENGTH, t0), + nm->mkNode(STRING_LENGTH, s0)); + if (convertLengthPf(lenReq, lenConstraint, psb)) + { + lenSuccess = true; + break; + } + if (r == 0) + { + // may be the other direction + applySym = true; + std::swap(t0, s0); + } + } + rule = PfRule::CONCAT_LPROP; + } + else if (infer == Inference::SSPLIT_CST_PROP) + { + // it should be the case that lenConstraint => lenReq + lenReq = nm->mkNode(STRING_LENGTH, t0) + .eqNode(nm->mkConst(Rational(0))) + .notNode(); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_CPROP; + } + if (!lenSuccess) + { + Trace("strings-ipc-core") + << "...failed due to length constraint" << std::endl; + break; + } + // apply symmetry if necessary + if (applySym) + { + std::vector<Node> childrenSymm; + childrenSymm.push_back(mainEqCeq); + // note this explicit step may not be necessary + mainEqCeq = psb.tryStep(PfRule::SYMM, childrenSymm, {}); + Trace("strings-ipc-core") + << "Main equality after SYMM " << mainEqCeq << std::endl; + } + if (rule != PfRule::UNKNOWN) + { + Trace("strings-ipc-core") + << "Core rule length requirement is " << lenReq << std::endl; + // apply the given rule + std::vector<Node> childrenMain; + childrenMain.push_back(mainEqCeq); + childrenMain.push_back(lenReq); + std::vector<Node> argsMain; + argsMain.push_back(nodeIsRev); + Node mainEqMain = psb.tryStep(rule, childrenMain, argsMain); + Trace("strings-ipc-core") << "Main equality after " << rule << " " + << mainEqMain << std::endl; + if (mainEqMain == mainEqCeq) + { + Trace("strings-ipc-core") << "...undo step" << std::endl; + // not necessary, probably first component of equality + psb.popStep(); + } + // either equal or rewrites to it + std::vector<Node> cexp; + if (psb.applyPredTransform(mainEqMain, conc, cexp)) + { + // requires that length success is also true + useBuffer = true; + Trace("strings-ipc-core") << "...success" << std::endl; + } + else + { + Trace("strings-ipc-core") << "...fail" << std::endl; + } + } + else + { + // should always have given a rule to try above + Assert(false) << "No reconstruction rule given for " << infer; + } + } + } + break; + // ========================== Disequalities + case Inference::DEQ_DISL_FIRST_CHAR_STRING_SPLIT: + case Inference::DEQ_DISL_STRINGS_SPLIT: + { + if (conc.getKind() != AND || conc.getNumChildren() != 2 + || conc[0].getKind() != EQUAL || !conc[0][0].getType().isStringLike() + || conc[1].getKind() != EQUAL + || conc[1][0].getKind() != STRING_LENGTH) + { + Trace("strings-ipc-deq") << "malformed application" << std::endl; + Assert(false) << "unexpected conclusion " << conc << " for " << infer; + } + else + { + Node lenReq = + nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, conc[0][0]), conc[1][1]); + Trace("strings-ipc-deq") + << "length requirement is " << lenReq << std::endl; + if (convertLengthPf(lenReq, ps.d_children, psb)) + { + Trace("strings-ipc-deq") << "...success length" << std::endl; + // make the proof + std::vector<Node> childrenMain; + childrenMain.push_back(lenReq); + std::vector<Node> argsMain; + argsMain.push_back(nodeIsRev); + Node mainConc = + psb.tryStep(PfRule::STRING_DECOMPOSE, childrenMain, argsMain); + Trace("strings-ipc-deq") + << "...main conclusion is " << mainConc << std::endl; + useBuffer = (mainConc == conc); + Trace("strings-ipc-deq") + << "...success is " << useBuffer << std::endl; + } + else + { + Trace("strings-ipc-deq") << "...fail length" << std::endl; + } + } + } + break; + // ========================== Boolean split + case Inference::CARD_SP: + case Inference::LEN_SPLIT: + case Inference::LEN_SPLIT_EMP: + case Inference::DEQ_DISL_EMP_SPLIT: + case Inference::DEQ_DISL_FIRST_CHAR_EQ_SPLIT: + case Inference::DEQ_STRINGS_EQ: + case Inference::DEQ_LENS_EQ: + case Inference::DEQ_LENGTH_SP: + { + if (conc.getKind() != OR) + { + // This should never happen. If it does, we resort to using + // STRING_TRUST below (in production mode). + Assert(false) << "Expected OR conclusion for " << infer; + } + else + { + ps.d_rule = PfRule::SPLIT; + Assert(ps.d_children.empty()); + ps.d_args.push_back(conc[0]); + } + } + break; + // ========================== Regular expression unfolding + case Inference::RE_UNFOLD_POS: + case Inference::RE_UNFOLD_NEG: + { + if (infer == Inference::RE_UNFOLD_POS) + { + ps.d_rule = PfRule::RE_UNFOLD_POS; + } + else + { + ps.d_rule = PfRule::RE_UNFOLD_NEG; + // it may be an optimized form of concat simplification + Assert(ps.d_children.size() == 1); + Node mem = ps.d_children[0]; + Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); + if (mem[0][1].getKind() == REGEXP_CONCAT) + { + size_t index; + Node reLen = RegExpOpr::getRegExpConcatFixed(mem[0][1], index); + // if we can find a fixed length for a component, use the optimized + // version + if (!reLen.isNull()) + { + ps.d_rule = PfRule::RE_UNFOLD_NEG_CONCAT_FIXED; + } + } + } + } + break; + // ========================== Reduction + case Inference::CTN_POS: + case Inference::CTN_NEG_EQUAL: + { + if (ps.d_children.size() != 1) + { + break; + } + bool polarity = ps.d_children[0].getKind() != NOT; + Node atom = polarity ? ps.d_children[0] : ps.d_children[0][0]; + std::vector<Node> args; + args.push_back(atom); + Node res = psb.tryStep(PfRule::STRING_EAGER_REDUCTION, {}, args); + if (res.isNull()) + { + break; + } + // ite( contains(x,t), x = k1 ++ t ++ k2, x != t ) + std::vector<Node> tiChildren; + tiChildren.push_back(ps.d_children[0]); + Node ctnt = psb.tryStep( + polarity ? PfRule::TRUE_INTRO : PfRule::FALSE_INTRO, tiChildren, {}); + if (ctnt.isNull() || ctnt.getKind() != EQUAL) + { + break; + } + std::vector<Node> tchildren; + tchildren.push_back(ctnt); + // apply substitution { contains(x,t) -> true|false } and rewrite to get + // conclusion x = k1 ++ t ++ k2 or x != t. + if (psb.applyPredTransform(res, conc, tchildren)) + { + useBuffer = true; + } + } + break; + + case Inference::REDUCTION: + { + size_t nchild = conc.getNumChildren(); + Node mainEq; + if (conc.getKind() == EQUAL) + { + mainEq = conc; + } + else if (conc.getKind() == AND && conc[nchild - 1].getKind() == EQUAL) + { + mainEq = conc[nchild - 1]; + } + if (mainEq.isNull()) + { + Trace("strings-ipc-red") << "Bad Reduction: " << conc << std::endl; + Assert(false) << "Unexpected reduction " << conc; + break; + } + std::vector<Node> argsRed; + // the left hand side of the last conjunct is the term we are reducing + argsRed.push_back(mainEq[0]); + Node red = psb.tryStep(PfRule::STRING_REDUCTION, {}, argsRed); + Trace("strings-ipc-red") << "Reduction : " << red << std::endl; + if (!red.isNull()) + { + // either equal or rewrites to it + std::vector<Node> cexp; + if (psb.applyPredTransform(red, conc, cexp)) + { + Trace("strings-ipc-red") << "...success!" << std::endl; + useBuffer = true; + } + else + { + Trace("strings-ipc-red") << "...failed to reduce" << std::endl; + } + } + } + break; + // ========================== code injectivity + case Inference::CODE_INJ: + { + ps.d_rule = PfRule::STRING_CODE_INJ; + Assert(conc.getKind() == OR && conc.getNumChildren() == 3 + && conc[2].getKind() == EQUAL); + ps.d_args.push_back(conc[2][0]); + ps.d_args.push_back(conc[2][1]); + } + break; + // ========================== unit injectivity + case Inference::UNIT_INJ: { ps.d_rule = PfRule::STRING_SEQ_UNIT_INJ; + } + break; + // ========================== prefix conflict + case Inference::PREFIX_CONFLICT: + { + Trace("strings-ipc-prefix") << "Prefix conflict..." << std::endl; + std::vector<Node> eqs; + for (const Node& e : ps.d_children) + { + Kind ek = e.getKind(); + if (ek == EQUAL) + { + Trace("strings-ipc-prefix") << "- equality : " << e << std::endl; + eqs.push_back(e); + } + else if (ek == STRING_IN_REGEXP) + { + // unfold it and extract the equality + std::vector<Node> children; + children.push_back(e); + std::vector<Node> args; + Node eunf = psb.tryStep(PfRule::RE_UNFOLD_POS, children, args); + Trace("strings-ipc-prefix") + << "--- " << e << " unfolds to " << eunf << std::endl; + if (eunf.isNull()) + { + continue; + } + else if (eunf.getKind() == AND) + { + // equality is the last conjunct + std::vector<Node> childrenAE; + childrenAE.push_back(eunf); + std::vector<Node> argsAE; + argsAE.push_back(nm->mkConst(Rational(eunf.getNumChildren() - 1))); + Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE); + Trace("strings-ipc-prefix") + << "--- and elim to " << eunfAE << std::endl; + if (eunfAE.isNull() || eunfAE.getKind() != EQUAL) + { + Assert(false) + << "Unexpected unfolded premise " << eunf << " for " << infer; + continue; + } + Trace("strings-ipc-prefix") + << "- equality : " << eunfAE << std::endl; + eqs.push_back(eunfAE); + } + else if (eunf.getKind() == EQUAL) + { + Trace("strings-ipc-prefix") << "- equality : " << eunf << std::endl; + eqs.push_back(eunf); + } + } + else + { + // not sure how to use this assumption + Assert(false) << "Unexpected premise " << e << " for " << infer; + } + } + if (eqs.empty()) + { + break; + } + // connect via transitivity + Node curr = eqs[0]; + for (size_t i = 1, esize = eqs.size(); i < esize; i++) + { + Node prev = curr; + curr = convertTrans(curr, eqs[1], psb); + if (curr.isNull()) + { + break; + } + Trace("strings-ipc-prefix") << "- Via trans: " << curr << std::endl; + } + if (curr.isNull()) + { + break; + } + Trace("strings-ipc-prefix") + << "- Possible conflicting equality : " << curr << std::endl; + std::vector<Node> emp; + Node concE = psb.applyPredElim(curr, emp); + Trace("strings-ipc-prefix") + << "- After pred elim: " << concE << std::endl; + if (concE == conc) + { + Trace("strings-ipc-prefix") << "...success!" << std::endl; + useBuffer = true; + } + } + break; + // ========================== regular expressions + case Inference::RE_INTER_INCLUDE: + case Inference::RE_INTER_CONF: + case Inference::RE_INTER_INFER: + { + std::vector<Node> reiExp; + std::vector<Node> reis; + std::vector<Node> reiChildren; + std::vector<Node> reiChildrenOrig; + Node x; + // make the regular expression intersection that summarizes all + // memberships in the explanation + for (const Node& c : ps.d_children) + { + bool polarity = c.getKind() != NOT; + Node catom = polarity ? c : c[0]; + if (catom.getKind() != STRING_IN_REGEXP) + { + Assert(c.getKind() == EQUAL); + if (c.getKind() == EQUAL) + { + reiExp.push_back(c); + } + continue; + } + if (x.isNull()) + { + // just take the first LHS; others should be equated to it by exp + x = catom[0]; + } + Node rcurr = + polarity ? catom[1] : nm->mkNode(REGEXP_COMPLEMENT, catom[1]); + reis.push_back(rcurr); + Node mem = nm->mkNode(STRING_IN_REGEXP, catom[0], rcurr); + reiChildren.push_back(mem); + reiChildrenOrig.push_back(c); + } + // go back and justify each premise + bool successChildren = true; + for (size_t i = 0, nchild = reiChildren.size(); i < nchild; i++) + { + if (!psb.applyPredTransform(reiChildrenOrig[i], reiChildren[i], reiExp)) + { + Trace("strings-ipc-re") + << "... failed to justify child " << reiChildren[i] << " from " + << reiChildrenOrig[i] << std::endl; + successChildren = false; + break; + } + } + if (!successChildren) + { + break; + } + Node mem = psb.tryStep(PfRule::RE_INTER, reiChildren, {}); + Trace("strings-ipc-re") + << "Regular expression summary: " << mem << std::endl; + // the conclusion is rewritable to the premises via rewriting? + if (psb.applyPredTransform(mem, conc, {})) + { + Trace("strings-ipc-re") << "... success!" << std::endl; + useBuffer = true; + } + else + { + Trace("strings-ipc-re") + << "...failed to rewrite to conclusion" << std::endl; + } + } + break; + // ========================== unknown and currently unsupported + case Inference::CARDINALITY: + case Inference::I_CYCLE_E: + case Inference::I_CYCLE: + case Inference::RE_DELTA: + case Inference::RE_DELTA_CONF: + case Inference::RE_DERIVE: + case Inference::FLOOP: + case Inference::FLOOP_CONFLICT: + case Inference::DEQ_NORM_EMP: + case Inference::CTN_TRANS: + case Inference::CTN_DECOMPOSE: + default: + // do nothing, these will be converted to STRING_TRUST below since the + // rule is unknown. + break; + } + + // now see if we would succeed with the checker-to-try + bool success = false; + if (ps.d_rule != PfRule::UNKNOWN) + { + Trace("strings-ipc") << "For " << infer << ", try proof rule " << ps.d_rule + << "..."; + Assert(ps.d_rule != PfRule::UNKNOWN); + Node pconc = psb.tryStep(ps.d_rule, ps.d_children, ps.d_args); + if (pconc.isNull() || pconc != conc) + { + Trace("strings-ipc") << "failed, pconc is " << pconc << " (expected " + << conc << ")" << std::endl; + ps.d_rule = PfRule::UNKNOWN; + } + else + { + // successfully set up a single step proof in ps + success = true; + Trace("strings-ipc") << "success!" << std::endl; + } + } + else if (useBuffer) + { + // successfully set up a multi step proof in psb + success = true; + } + else + { + Trace("strings-ipc") << "For " << infer << " " << conc + << ", no proof rule, failed" << std::endl; + } + if (!success) + { + // debug print + if (Trace.isOn("strings-ipc-fail")) + { + Trace("strings-ipc-fail") + << "InferProofCons::convert: Failed " << infer + << (isRev ? " :rev " : " ") << conc << std::endl; + for (const Node& ec : exp) + { + Trace("strings-ipc-fail") << " e: " << ec << std::endl; + } + } + // untrustworthy conversion, the argument of STRING_TRUST is its conclusion + ps.d_args.clear(); + ps.d_args.push_back(conc); + // use the trust rule + ps.d_rule = PfRule::STRING_TRUST; + // add to stats + d_statistics.d_inferencesNoPf << infer; + } + if (Trace.isOn("strings-ipc-debug")) + { + if (useBuffer) + { + Trace("strings-ipc-debug") + << "InferProofCons::convert returned buffer with " + << psb.getNumSteps() << " steps:" << std::endl; + const std::vector<std::pair<Node, ProofStep>>& steps = psb.getSteps(); + for (const std::pair<Node, ProofStep>& step : steps) + { + Trace("strings-ipc-debug") + << "- " << step.first << " via " << step.second << std::endl; + } + } + else + { + Trace("strings-ipc-debug") + << "InferProofCons::convert returned " << ps << std::endl; + } + } +} + +bool InferProofCons::convertLengthPf(Node lenReq, + const std::vector<Node>& lenExp, + TheoryProofStepBuffer& psb) +{ + for (const Node& le : lenExp) + { + if (lenReq == le) + { + return true; + } + } + Trace("strings-ipc-len") << "Must explain " << lenReq << " by " << lenExp + << std::endl; + for (const Node& le : lenExp) + { + // probably rewrites to it? + std::vector<Node> exp; + if (psb.applyPredTransform(le, lenReq, exp)) + { + Trace("strings-ipc-len") << "...success by rewrite" << std::endl; + return true; + } + // maybe x != "" => len(x) != 0 + std::vector<Node> children; + children.push_back(le); + std::vector<Node> args; + Node res = psb.tryStep(PfRule::STRING_LENGTH_NON_EMPTY, children, args); + if (res == lenReq) + { + Trace("strings-ipc-len") << "...success by LENGTH_NON_EMPTY" << std::endl; + return true; + } + } + Trace("strings-ipc-len") << "...failed" << std::endl; + return false; +} + +Node InferProofCons::convertTrans(Node eqa, + Node eqb, + TheoryProofStepBuffer& psb) +{ + if (eqa.getKind() != EQUAL || eqb.getKind() != EQUAL) + { + return Node::null(); + } + for (uint32_t i = 0; i < 2; i++) + { + Node eqaSym = i == 0 ? eqa[1].eqNode(eqa[0]) : eqa; + for (uint32_t j = 0; j < 2; j++) + { + Node eqbSym = j == 0 ? eqb : eqb[1].eqNode(eqb[1]); + if (eqa[i] == eqb[j]) + { + std::vector<Node> children; + children.push_back(eqaSym); + children.push_back(eqbSym); + return psb.tryStep(PfRule::TRANS, children, {}); + } + } + } + return Node::null(); +} + +std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact) +{ + // temporary proof + CDProof pf(d_pnm); + // get the inference + NodeInferInfoMap::iterator it = d_lazyFactMap.find(fact); + if (it == d_lazyFactMap.end()) + { + Node factSym = CDProof::getSymmFact(fact); + if (!factSym.isNull()) + { + // Use the symmetric fact. There is no need to explictly make a + // SYMM proof, as this is handled by CDProof::getProofFor below. + it = d_lazyFactMap.find(factSym); + } + } + AlwaysAssert(it != d_lazyFactMap.end()); + // now go back and convert it to proof steps and add to proof + bool useBuffer = false; + ProofStep ps; + TheoryProofStepBuffer psb(d_pnm->getChecker()); + std::shared_ptr<InferInfo> ii = (*it).second; + // run the conversion + convert(ii->d_id, ii->d_idRev, ii->d_conc, ii->d_ant, ps, psb, useBuffer); + // make the proof based on the step or the buffer + if (useBuffer) + { + if (!pf.addSteps(psb)) + { + return nullptr; + } + } + else + { + if (!pf.addStep(fact, ps)) + { + return nullptr; + } + } + return pf.getProofFor(fact); +} + +std::string InferProofCons::identify() const +{ + return "strings::InferProofCons"; +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/infer_proof_cons.h b/src/theory/strings/infer_proof_cons.h new file mode 100644 index 000000000..63e341dfe --- /dev/null +++ b/src/theory/strings/infer_proof_cons.h @@ -0,0 +1,135 @@ +/********************* */ +/*! \file infer_proof_cons.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Inference to proof conversion + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__INFER_PROOF_CONS_H +#define CVC4__THEORY__STRINGS__INFER_PROOF_CONS_H + +#include <vector> + +#include "expr/node.h" +#include "expr/proof_checker.h" +#include "expr/proof_rule.h" +#include "theory/builtin/proof_checker.h" +#include "theory/strings/infer_info.h" +#include "theory/strings/sequences_stats.h" +#include "theory/theory_proof_step_buffer.h" +#include "theory/uf/proof_equality_engine.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * Converts between the strings-specific (untrustworthy) InferInfo class and + * information about how to construct a trustworthy proof step + * (PfRule, children, args). It acts as a (lazy) proof generator where the + * former is registered via notifyFact and the latter is asked for in + * getProofFor, typically by the proof equality engine. + * + * The main (private) method of this class is convert below, which is + * called when we need to construct a proof node from an InferInfo. + */ +class InferProofCons : public ProofGenerator +{ + typedef context::CDHashMap<Node, std::shared_ptr<InferInfo>, NodeHashFunction> + NodeInferInfoMap; + + public: + InferProofCons(context::Context* c, + ProofNodeManager* pnm, + SequencesStatistics& statistics); + ~InferProofCons() {} + /** + * This is called to notify that ii is an inference that may need a proof + * in the future. + * + * In detail, this class should be prepared to respond to a call to: + * getProofFor(ii.d_conc) + * in the remainder of the SAT context. This method copies ii and stores it + * in the context-dependent map d_lazyFactMap below. + * + * This is used for lazy proof construction, where proofs are constructed + * only for facts that are explained. + */ + void notifyFact(const InferInfo& ii); + + /** + * This returns the proof for fact. This is required for using this class as + * a lazy proof generator. + * + * It should be the case that a call was made to notifyFact(ii) where + * ii.d_conc is fact in this SAT context. + */ + std::shared_ptr<ProofNode> getProofFor(Node fact) override; + /** Identify this generator (for debugging, etc..) */ + virtual std::string identify() const override; + + private: + /** convert + * + * This method is called when the theory of strings makes an inference + * described by an InferInfo, whose fields are given by the first four + * arguments of this method. + * + * This method converts this call to instructions on what the proof rule + * step(s) are for concluding the conclusion of the inference. This + * information is either: + * + * (A) stored in the argument ps, which consists of: + * - A proof rule identifier (ProofStep::d_rule). + * - The premises of the proof step (ProofStep::d_children). + * - Arguments to the proof step (ProofStep::d_args). + * + * (B) If the proof for the inference cannot be captured by a single + * step, then the d_rule field of ps is not set, and useBuffer is set to + * true. In this case, the argument psb is updated to contain (possibly + * multiple) proof steps for how to construct a proof for the given inference. + * In particular, psb will contain a set of steps that form a proof + * whose conclusion is ii.d_conc and whose free assumptions are ii.d_ant. + */ + void convert(Inference infer, + bool isRev, + Node conc, + const std::vector<Node>& exp, + ProofStep& ps, + TheoryProofStepBuffer& psb, + bool& useBuffer); + /** + * Convert length proof. If this method returns true, it adds proof step(s) + * to the buffer psb that conclude lenReq from premises lenExp. + */ + bool convertLengthPf(Node lenReq, + const std::vector<Node>& lenExp, + TheoryProofStepBuffer& psb); + /** + * Helper method, adds the proof of (TRANS eqa eqb) into the proof step + * buffer psb, where eqa and eqb are flipped as needed. Returns the + * conclusion, or null if we were not able to construct a TRANS step. + */ + Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb); + /** the proof node manager */ + ProofNodeManager* d_pnm; + /** The lazy fact map */ + NodeInferInfoMap d_lazyFactMap; + /** Reference to the statistics for the theory of strings/sequences. */ + SequencesStatistics& d_statistics; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__INFER_PROOF_CONS_H */ diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp index 6d33c8627..e324689f5 100644 --- a/src/theory/strings/inference_manager.cpp +++ b/src/theory/strings/inference_manager.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -28,19 +28,19 @@ namespace CVC4 { namespace theory { namespace strings { -InferenceManager::InferenceManager(context::Context* c, - context::UserContext* u, +InferenceManager::InferenceManager(Theory& t, SolverState& s, TermRegistry& tr, ExtTheory& e, - OutputChannel& out, - SequencesStatistics& statistics) - : d_state(s), + SequencesStatistics& statistics, + ProofNodeManager* pnm) + : InferenceManagerBuffered(t, s, pnm), + d_state(s), d_termReg(tr), d_extt(e), - d_out(out), d_statistics(statistics), - d_keep(c) + d_ipc(pnm ? new InferProofCons(d_state.getSatContext(), pnm, d_statistics) + : nullptr) { NodeManager* nm = NodeManager::currentNM(); d_zero = nm->mkConst(Rational(0)); @@ -49,12 +49,18 @@ InferenceManager::InferenceManager(context::Context* c, d_false = nm->mkConst(false); } -void InferenceManager::sendAssumption(TNode lit) +void InferenceManager::doPending() { - bool polarity = lit.getKind() != kind::NOT; - TNode atom = polarity ? lit : lit[0]; - // assert pending fact - assertPendingFact(atom, polarity, lit); + doPendingFacts(); + if (d_state.isInConflict()) + { + // just clear the pending vectors, nothing else to do + clearPendingLemmas(); + clearPendingPhaseRequirements(); + return; + } + doPendingLemmas(); + doPendingPhaseRequirements(); } bool InferenceManager::sendInternalInference(std::vector<Node>& exp, @@ -116,63 +122,67 @@ bool InferenceManager::sendInternalInference(std::vector<Node>& exp, return true; } -void InferenceManager::sendInference(const std::vector<Node>& exp, - const std::vector<Node>& expn, +bool InferenceManager::sendInference(const std::vector<Node>& exp, + const std::vector<Node>& noExplain, Node eq, Inference infer, + bool isRev, bool asLemma) { - eq = eq.isNull() ? d_false : Rewriter::rewrite(eq); - if (eq == d_true) + if (eq.isNull()) { - return; + eq = d_false; + } + else if (Rewriter::rewrite(eq) == d_true) + { + // if trivial, return + return false; } // wrap in infer info and send below InferInfo ii; ii.d_id = infer; + ii.d_idRev = isRev; ii.d_conc = eq; ii.d_ant = exp; - ii.d_antn = expn; + ii.d_noExplain = noExplain; sendInference(ii, asLemma); + return true; } -void InferenceManager::sendInference(const std::vector<Node>& exp, +bool InferenceManager::sendInference(const std::vector<Node>& exp, Node eq, Inference infer, + bool isRev, bool asLemma) { - std::vector<Node> expn; - sendInference(exp, expn, eq, infer, asLemma); + std::vector<Node> noExplain; + return sendInference(exp, noExplain, eq, infer, isRev, asLemma); } -void InferenceManager::sendInference(const InferInfo& ii, bool asLemma) +void InferenceManager::sendInference(InferInfo& ii, bool asLemma) { Assert(!ii.isTrivial()); + // set that this inference manager will be processing this inference + ii.d_sim = this; Trace("strings-infer-debug") << "sendInference: " << ii << ", asLemma = " << asLemma << std::endl; // check if we should send a conflict, lemma or a fact - if (asLemma || options::stringInferAsLemmas() || !ii.isFact()) + if (ii.isConflict()) + { + Trace("strings-infer-debug") << "...as conflict" << std::endl; + Trace("strings-lemma") << "Strings::Conflict: " << ii.d_ant << " by " + << ii.d_id << std::endl; + Trace("strings-conflict") << "CONFLICT: inference conflict " << ii.d_ant + << " by " << ii.d_id << std::endl; + ++(d_statistics.d_conflictsInfer); + // process the conflict immediately + processConflict(ii); + return; + } + else if (asLemma || options::stringInferAsLemmas() || !ii.isFact()) { - if (ii.isConflict()) - { - Trace("strings-infer-debug") << "...as conflict" << std::endl; - Trace("strings-lemma") << "Strings::Conflict: " << ii.d_ant << " by " - << ii.d_id << std::endl; - Trace("strings-conflict") << "CONFLICT: inference conflict " << ii.d_ant - << " by " << ii.d_id << std::endl; - // we must fully explain it - Node conf = mkExplain(ii.d_ant); - Trace("strings-assert") << "(assert (not " << conf << ")) ; conflict " - << ii.d_id << std::endl; - ++(d_statistics.d_conflictsInfer); - // only keep stats if we process it here - d_statistics.d_inferences << ii.d_id; - d_out.conflict(conf); - d_state.setConflict(); - return; - } Trace("strings-infer-debug") << "...as lemma" << std::endl; - d_pendingLem.push_back(ii); + addPendingLemma(std::unique_ptr<InferInfo>(new InferInfo(ii))); return; } if (options::stringInferSym()) @@ -189,6 +199,7 @@ void InferenceManager::sendInference(const InferInfo& ii, bool asLemma) Node eqs = ii.d_conc.substitute( vars.begin(), vars.end(), subs.begin(), subs.end()); InferInfo iiSubsLem; + iiSubsLem.d_sim = this; // keep the same id for now, since we are transforming the form of the // inference, not the root reason. iiSubsLem.d_id = ii.d_id; @@ -206,7 +217,7 @@ void InferenceManager::sendInference(const InferInfo& ii, bool asLemma) } } Trace("strings-infer-debug") << "...as symbolic lemma" << std::endl; - d_pendingLem.push_back(iiSubsLem); + addPendingLemma(std::unique_ptr<InferInfo>(new InferInfo(iiSubsLem))); return; } if (Trace.isOn("strings-lemma-debug")) @@ -214,13 +225,13 @@ void InferenceManager::sendInference(const InferInfo& ii, bool asLemma) for (const Node& u : unproc) { Trace("strings-lemma-debug") - << " non-trivial exp : " << u << std::endl; + << " non-trivial explanation : " << u << std::endl; } } } Trace("strings-infer-debug") << "...as fact" << std::endl; - // add to pending, to be processed as a fact - d_pending.push_back(ii); + // add to pending to be processed as a fact + addPendingFact(std::unique_ptr<InferInfo>(new InferInfo(ii))); } bool InferenceManager::sendSplit(Node a, Node b, Inference infer, bool preq) @@ -233,19 +244,15 @@ bool InferenceManager::sendSplit(Node a, Node b, Inference infer, bool preq) } NodeManager* nm = NodeManager::currentNM(); InferInfo iiSplit; + iiSplit.d_sim = this; iiSplit.d_id = infer; iiSplit.d_conc = nm->mkNode(OR, eq, nm->mkNode(NOT, eq)); - sendPhaseRequirement(eq, preq); - d_pendingLem.push_back(iiSplit); + eq = Rewriter::rewrite(eq); + addPendingPhaseRequirement(eq, preq); + addPendingLemma(std::unique_ptr<InferInfo>(new InferInfo(iiSplit))); return true; } -void InferenceManager::sendPhaseRequirement(Node lit, bool pol) -{ - lit = Rewriter::rewrite(lit); - d_pendingReqPhase[lit] = pol; -} - void InferenceManager::setIncomplete() { d_out.setIncomplete(); } void InferenceManager::addToExplanation(Node a, @@ -265,304 +272,167 @@ void InferenceManager::addToExplanation(Node lit, std::vector<Node>& exp) const { if (!lit.isNull()) { + Assert(!lit.isConst()); exp.push_back(lit); } } -void InferenceManager::doPendingFacts() +bool InferenceManager::hasProcessed() const { - size_t i = 0; - while (!d_state.isInConflict() && i < d_pending.size()) - { - InferInfo& ii = d_pending[i]; - // At this point, ii should be a "fact", i.e. something whose conclusion - // should be added as a normal equality or predicate to the equality engine - // with no new external assumptions (ii.d_antn). - Assert(ii.isFact()); - Node facts = ii.d_conc; - Node exp = utils::mkAnd(ii.d_ant); - Trace("strings-assert") << "(assert (=> " << exp << " " << facts - << ")) ; fact " << ii.d_id << std::endl; - // only keep stats if we process it here - Trace("strings-lemma") << "Strings::Fact: " << facts << " from " << exp - << " by " << ii.d_id << std::endl; - d_statistics.d_inferences << ii.d_id; - // assert it as a pending fact - if (facts.getKind() == AND) - { - for (const Node& fact : facts) - { - bool polarity = fact.getKind() != NOT; - TNode atom = polarity ? fact : fact[0]; - // no double negation or double (conjunctive) conclusions - Assert(atom.getKind() != NOT && atom.getKind() != AND); - assertPendingFact(atom, polarity, exp); - } - } - else - { - bool polarity = facts.getKind() != NOT; - TNode atom = polarity ? facts : facts[0]; - // no double negation or double (conjunctive) conclusions - Assert(atom.getKind() != NOT && atom.getKind() != AND); - assertPendingFact(atom, polarity, exp); - } - // Must reference count the equality and its explanation, which is not done - // by the equality engine. Notice that we do not need to do this for - // external assertions, which enter as facts through sendAssumption. - d_keep.insert(facts); - d_keep.insert(exp); - i++; - } - d_pending.clear(); + return d_state.isInConflict() || hasPending(); } -void InferenceManager::doPendingLemmas() +void InferenceManager::markCongruent(Node a, Node b) { - if (d_state.isInConflict()) + Assert(a.getKind() == b.getKind()); + if (d_extt.hasFunctionKind(a.getKind())) { - // just clear the pending vectors, nothing else to do - d_pendingLem.clear(); - d_pendingReqPhase.clear(); - return; + d_extt.markCongruent(a, b); } - NodeManager* nm = NodeManager::currentNM(); - for (unsigned i = 0, psize = d_pendingLem.size(); i < psize; i++) - { - InferInfo& ii = d_pendingLem[i]; - Assert(!ii.isTrivial()); - Assert(!ii.isConflict()); - // get the explanation - Node eqExp; - if (options::stringRExplainLemmas()) - { - eqExp = mkExplain(ii.d_ant, ii.d_antn); - } - else - { - std::vector<Node> ev; - ev.insert(ev.end(), ii.d_ant.begin(), ii.d_ant.end()); - ev.insert(ev.end(), ii.d_antn.begin(), ii.d_antn.end()); - eqExp = utils::mkAnd(ev); - } - // make the lemma node - Node lem = ii.d_conc; - if (eqExp != d_true) - { - lem = nm->mkNode(IMPLIES, eqExp, lem); - } - Trace("strings-pending") << "Process pending lemma : " << lem << std::endl; - Trace("strings-assert") - << "(assert " << lem << ") ; lemma " << ii.d_id << std::endl; - Trace("strings-lemma") << "Strings::Lemma: " << lem << " by " << ii.d_id - << std::endl; - // only keep stats if we process it here - d_statistics.d_inferences << ii.d_id; - ++(d_statistics.d_lemmasInfer); +} - // Process the side effects of the inference info. - // Register the new skolems from this inference. We register them here - // (lazily), since this is the moment when we have decided to process the - // inference. - for (const std::pair<const LengthStatus, std::vector<Node> >& sks : - ii.d_new_skolem) - { - for (const Node& n : sks.second) - { - d_termReg.registerTermAtomic(n, sks.first); - } - } +void InferenceManager::markReduced(Node n, bool contextDepend) +{ + d_extt.markReduced(n, contextDepend); +} - d_out.lemma(lem); - } - // process the pending require phase calls - for (const std::pair<const Node, bool>& prp : d_pendingReqPhase) +void InferenceManager::processConflict(const InferInfo& ii) +{ + Assert(!d_state.isInConflict()); + // setup the fact to reproduce the proof in the call below + d_statistics.d_inferences << ii.d_id; + if (d_ipc != nullptr) { - Trace("strings-pending") << "Require phase : " << prp.first - << ", polarity = " << prp.second << std::endl; - d_out.requirePhase(prp.first, prp.second); + d_ipc->notifyFact(ii); } - d_pendingLem.clear(); - d_pendingReqPhase.clear(); + // make the trust node + TrustNode tconf = mkConflictExp(ii.d_ant, d_ipc.get()); + Assert(tconf.getKind() == TrustNodeKind::CONFLICT); + Trace("strings-assert") << "(assert (not " << tconf.getNode() + << ")) ; conflict " << ii.d_id << std::endl; + // send the trusted conflict + trustedConflict(tconf); } -void InferenceManager::assertPendingFact(Node atom, bool polarity, Node exp) +bool InferenceManager::processFact(InferInfo& ii) { - eq::EqualityEngine* ee = d_state.getEqualityEngine(); - Trace("strings-pending") << "Assert pending fact : " << atom << " " - << polarity << " from " << exp << std::endl; - Assert(atom.getKind() != OR) << "Infer error: a split."; - if (atom.getKind() == EQUAL) + // Get the fact(s). There are multiple facts if the conclusion is an AND + std::vector<Node> facts; + if (ii.d_conc.getKind() == AND) { - // we must ensure these terms are registered - Trace("strings-pending-debug") << " Register term" << std::endl; - for (const Node& t : atom) + for (const Node& cc : ii.d_conc) { - // terms in the equality engine are already registered, hence skip - // currently done for only string-like terms, but this could potentially - // be avoided. - if (!ee->hasTerm(t) && t.getType().isStringLike()) - { - d_termReg.registerTerm(t, 0); - } + facts.push_back(cc); } - Trace("strings-pending-debug") << " Now assert equality" << std::endl; - ee->assertEquality(atom, polarity, exp); - Trace("strings-pending-debug") << " Finished assert equality" << std::endl; } else { - ee->assertPredicate(atom, polarity, exp); - if (atom.getKind() == STRING_IN_REGEXP) - { - if (polarity && atom[1].getKind() == REGEXP_CONCAT) - { - Node eqc = ee->getRepresentative(atom[0]); - d_state.addEndpointsToEqcInfo(atom, atom[1], eqc); - } - } - } - // process the conflict - if (!d_state.isInConflict()) - { - Node pc = d_state.getPendingConflict(); - if (!pc.isNull()) - { - std::vector<Node> a; - a.push_back(pc); - Trace("strings-pending") - << "Process pending conflict " << pc << std::endl; - Node conflictNode = mkExplain(a); - d_state.setConflict(); - Trace("strings-conflict") - << "CONFLICT: Eager prefix : " << conflictNode << std::endl; - ++(d_statistics.d_conflictsEagerPrefix); - d_out.conflict(conflictNode); - } + facts.push_back(ii.d_conc); } - Trace("strings-pending-debug") << " Now collect terms" << std::endl; - // Collect extended function terms in the atom. Notice that we must register - // all extended functions occurring in assertions and shared terms. We - // make a similar call to registerTermRec in TheoryStrings::addSharedTerm. - d_extt.registerTermRec(atom); - Trace("strings-pending-debug") << " Finished collect terms" << std::endl; -} - -bool InferenceManager::hasProcessed() const -{ - return d_state.isInConflict() || !d_pendingLem.empty() || !d_pending.empty(); -} - -Node InferenceManager::mkExplain(const std::vector<Node>& a) const -{ - std::vector<Node> an; - return mkExplain(a, an); -} - -Node InferenceManager::mkExplain(const std::vector<Node>& a, - const std::vector<Node>& an) const -{ - std::vector<TNode> antec_exp; - // copy to processing vector - std::vector<Node> aconj; - for (const Node& ac : a) + Trace("strings-assert") << "(assert (=> " << ii.getAntecedant() << " " + << ii.d_conc << ")) ; fact " << ii.d_id << std::endl; + Trace("strings-lemma") << "Strings::Fact: " << ii.d_conc << " from " + << ii.getAntecedant() << " by " << ii.d_id + << std::endl; + std::vector<Node> exp; + for (const Node& ec : ii.d_ant) { - utils::flattenOp(AND, ac, aconj); + utils::flattenOp(AND, ec, exp); } - eq::EqualityEngine* ee = d_state.getEqualityEngine(); - for (const Node& apc : aconj) + bool ret = false; + // convert for each fact + for (const Node& fact : facts) { - Assert(apc.getKind() != AND); - Debug("strings-explain") << "Add to explanation " << apc << std::endl; - if (apc.getKind() == NOT && apc[0].getKind() == EQUAL) + ii.d_conc = fact; + d_statistics.d_inferences << ii.d_id; + bool polarity = fact.getKind() != NOT; + TNode atom = polarity ? fact : fact[0]; + bool curRet = false; + if (d_ipc != nullptr) { - Assert(ee->hasTerm(apc[0][0])); - Assert(ee->hasTerm(apc[0][1])); - // ensure that we are ready to explain the disequality - AlwaysAssert(ee->areDisequal(apc[0][0], apc[0][1], true)); + // ensure the proof generator is ready to explain this fact in the + // current SAT context + d_ipc->notifyFact(ii); + // now, assert the internal fact with d_ipc as proof generator + curRet = assertInternalFact(atom, polarity, exp, d_ipc.get()); } - Assert(apc.getKind() != EQUAL || ee->areEqual(apc[0], apc[1])); - // now, explain - explain(apc, antec_exp); - } - for (const Node& anc : an) - { - if (std::find(antec_exp.begin(), antec_exp.end(), anc) == antec_exp.end()) + else { - Debug("strings-explain") - << "Add to explanation (new literal) " << anc << std::endl; - antec_exp.push_back(anc); + Node cexp = utils::mkAnd(exp); + // without proof generator + curRet = assertInternalFact(atom, polarity, cexp); + } + ret = ret || curRet; + // may be in conflict + if (d_state.isInConflict()) + { + break; } } - Node ant; - if (antec_exp.empty()) - { - ant = d_true; - } - else if (antec_exp.size() == 1) - { - ant = antec_exp[0]; - } - else - { - ant = NodeManager::currentNM()->mkNode(AND, antec_exp); - } - return ant; + return ret; } -void InferenceManager::explain(TNode literal, - std::vector<TNode>& assumptions) const +bool InferenceManager::processLemma(InferInfo& ii) { - Debug("strings-explain") << "Explain " << literal << " " - << d_state.isInConflict() << std::endl; - eq::EqualityEngine* ee = d_state.getEqualityEngine(); - bool polarity = literal.getKind() != NOT; - TNode atom = polarity ? literal : literal[0]; - std::vector<TNode> tassumptions; - if (atom.getKind() == EQUAL) + Assert(!ii.isTrivial()); + Assert(!ii.isConflict()); + // set up the explanation and no-explanation + std::vector<Node> exp; + for (const Node& ec : ii.d_ant) { - if (atom[0] != atom[1]) - { - Assert(ee->hasTerm(atom[0])); - Assert(ee->hasTerm(atom[1])); - ee->explainEquality(atom[0], atom[1], polarity, tassumptions); - } + utils::flattenOp(AND, ec, exp); } - else + std::vector<Node> noExplain; + if (!options::stringRExplainLemmas()) { - ee->explainPredicate(atom, polarity, tassumptions); + // if we aren't regressing the explanation, we add all literals to + // noExplain and ignore ii.d_ant. + noExplain.insert(noExplain.end(), exp.begin(), exp.end()); } - for (const TNode& a : tassumptions) + else { - if (std::find(assumptions.begin(), assumptions.end(), a) - == assumptions.end()) + // otherwise, the no-explain literals are those provided + for (const Node& ecn : ii.d_noExplain) { - assumptions.push_back(a); + utils::flattenOp(AND, ecn, noExplain); } } - if (Debug.isOn("strings-explain-debug")) + // ensure that the proof generator is ready to explain the final conclusion + // of the lemma (ii.d_conc). + d_statistics.d_inferences << ii.d_id; + if (d_ipc != nullptr) + { + d_ipc->notifyFact(ii); + } + TrustNode tlem = mkLemmaExp(ii.d_conc, exp, noExplain, d_ipc.get()); + Trace("strings-pending") << "Process pending lemma : " << tlem.getNode() + << std::endl; + + // Process the side effects of the inference info. + // Register the new skolems from this inference. We register them here + // (lazily), since this is the moment when we have decided to process the + // inference. + for (const std::pair<const LengthStatus, std::vector<Node> >& sks : + ii.d_new_skolem) { - Debug("strings-explain-debug") - << "Explanation for " << literal << " was " << std::endl; - for (const TNode& a : tassumptions) + for (const Node& n : sks.second) { - Debug("strings-explain-debug") << " " << a << std::endl; + d_termReg.registerTermAtomic(n, sks.first); } } -} - -void InferenceManager::markCongruent(Node a, Node b) -{ - Assert(a.getKind() == b.getKind()); - if (d_extt.hasFunctionKind(a.getKind())) + LemmaProperty p = LemmaProperty::NONE; + if (ii.d_id == Inference::REDUCTION) { - d_extt.markCongruent(a, b); + p |= LemmaProperty::NEEDS_JUSTIFY; } -} + Trace("strings-assert") << "(assert " << tlem.getNode() << ") ; lemma " + << ii.d_id << std::endl; + Trace("strings-lemma") << "Strings::Lemma: " << tlem.getNode() << " by " + << ii.d_id << std::endl; + ++(d_statistics.d_lemmasInfer); -void InferenceManager::markReduced(Node n, bool contextDepend) -{ - d_extt.markReduced(n, contextDepend); + // call the trusted lemma, without caching + return trustedLemma(tlem, p, false); } } // namespace strings diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h index 4e50a6cb7..3280281bd 100644 --- a/src/theory/strings/inference_manager.h +++ b/src/theory/strings/inference_manager.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -23,12 +23,16 @@ #include "context/cdhashset.h" #include "context/context.h" #include "expr/node.h" +#include "expr/proof_node_manager.h" #include "theory/ext_theory.h" +#include "theory/inference_manager_buffered.h" #include "theory/output_channel.h" #include "theory/strings/infer_info.h" +#include "theory/strings/infer_proof_cons.h" #include "theory/strings/sequences_stats.h" #include "theory/strings/solver_state.h" #include "theory/strings/term_registry.h" +#include "theory/theory_inference_manager.h" #include "theory/uf/equality_engine.h" namespace CVC4 { @@ -65,27 +69,29 @@ namespace strings { * theory of strings, e.g. sendPhaseRequirement, setIncomplete, and * with the extended theory object e.g. markCongruent. */ -class InferenceManager +class InferenceManager : public InferenceManagerBuffered { typedef context::CDHashSet<Node, NodeHashFunction> NodeSet; typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap; + friend class InferInfo; public: - InferenceManager(context::Context* c, - context::UserContext* u, + InferenceManager(Theory& t, SolverState& s, TermRegistry& tr, ExtTheory& e, - OutputChannel& out, - SequencesStatistics& statistics); + SequencesStatistics& statistics, + ProofNodeManager* pnm); ~InferenceManager() {} - /** send assumption - * - * This is called when a fact is asserted to TheoryStrings. It adds lit - * to the equality engine maintained by this class immediately. + /** + * Do pending method. This processes all pending facts, lemmas and pending + * phase requests based on the policy of this manager. This means that + * we process the pending facts first and abort if in conflict. Otherwise, we + * process the pending lemmas and then the pending phase requirements. + * Notice that we process the pending lemmas even if there were facts. */ - void sendAssumption(TNode lit); + void doPending(); /** send internal inferences * @@ -109,26 +115,27 @@ class InferenceManager bool sendInternalInference(std::vector<Node>& exp, Node conc, Inference infer); + /** send inference * - * This function should be called when ( exp ^ exp_n ) => eq. The set exp + * This function should be called when exp => eq. The set exp * contains literals that are explainable, i.e. those that hold in the * equality engine of the theory of strings. On the other hand, the set - * exp_n ("explanations new") contain nodes that are not explainable by the - * theory of strings. This method may call sendLemma or otherwise add a - * InferInfo to d_pending, indicating a fact should be asserted to the - * equality engine. Overall, the result of this method is one of the - * following: + * noExplain contains nodes that are not explainable by the theory of strings. + * This method may call sendLemma or otherwise add a InferInfo to d_pending, + * indicating a fact should be asserted to the equality engine. Overall, the + * result of this method is one of the following: * - * [1] (No-op) Do nothing if eq is true, + * [1] (No-op) Do nothing if eq is equivalent to true, * * [2] (Infer) Indicate that eq should be added to the equality engine of this * class with explanation exp, where exp is a set of literals that currently * hold in the equality engine. We add this to the pending vector d_pending. * - * [3] (Lemma) Indicate that the lemma ( EXPLAIN(exp) ^ exp_n ) => eq should - * be sent on the output channel of the theory of strings, where EXPLAIN - * returns the explanation of the node in exp in terms of the literals + * [3] (Lemma) Indicate that the lemma + * ( EXPLAIN(exp \ noExplain) ^ noExplain ) => eq + * should be sent on the output channel of the theory of strings, where + * EXPLAIN returns the explanation of the node in exp in terms of the literals * asserted to the theory of strings, as computed by the equality engine. * This is also added to a pending vector, d_pendingLem. * @@ -136,25 +143,35 @@ class InferenceManager * channel of the theory of strings. * * Determining which case to apply depends on the form of eq and whether - * exp_n is empty. In particular, lemmas must be used whenever exp_n is - * non-empty, conflicts are used when exp_n is empty and eq is false. + * noExplain is empty. In particular, lemmas must be used whenever noExplain + * is non-empty, conflicts are used when noExplain is empty and eq is false. * - * The argument infer identifies the reason for inference, used for + * @param exp The explanation of eq. + * @param noExplain The subset of exp that cannot be explained by the + * equality engine. This may impact whether we are processing this call as a + * fact or as a lemma. + * @param eq The conclusion. + * @param infer Identifies the reason for inference, used for * debugging. This updates the statistics about the number of inferences made * of each type. - * - * If the flag asLemma is true, then this method will send a lemma instead + * @param isRev Whether this is the "reverse variant" of the inference, which + * is used as a hint for proof reconstruction. + * @param asLemma If true, then this method will send a lemma instead * of a fact whenever applicable. + * @return true if the inference was not trivial (e.g. its conclusion did + * not rewrite to true). */ - void sendInference(const std::vector<Node>& exp, - const std::vector<Node>& exp_n, + bool sendInference(const std::vector<Node>& exp, + const std::vector<Node>& noExplain, Node eq, Inference infer, + bool isRev = false, bool asLemma = false); - /** same as above, but where exp_n is empty */ - void sendInference(const std::vector<Node>& exp, + /** same as above, but where noExplain is empty */ + bool sendInference(const std::vector<Node>& exp, Node eq, Inference infer, + bool isRev = false, bool asLemma = false); /** Send inference @@ -169,7 +186,7 @@ class InferenceManager * If the flag asLemma is true, then this method will send a lemma instead * of a fact whenever applicable. */ - void sendInference(const InferInfo& ii, bool asLemma = false); + void sendInference(InferInfo& ii, bool asLemma = false); /** Send split * * This requests that ( a = b V a != b ) is sent on the output channel as a @@ -184,17 +201,6 @@ class InferenceManager * otherwise. A split is trivial if a=b rewrites to a constant. */ bool sendSplit(Node a, Node b, Inference infer, bool preq = true); - - /** Send phase requirement - * - * This method is called to indicate this class should send a phase - * requirement request to the output channel for literal lit to be - * decided with polarity pol. This requirement is processed at the same time - * lemmas are sent on the output channel of this class during this call to - * check. This means if the current lemmas of this class are abandoned (due - * to a conflict), the phase requirement is not processed. - */ - void sendPhaseRequirement(Node lit, bool pol); /** * Set that we are incomplete for the current set of assertions (in other * words, we must answer "unknown" instead of "sat"); this calls the output @@ -211,60 +217,13 @@ class InferenceManager /** Adds lit to the vector exp if it is non-null */ void addToExplanation(Node lit, std::vector<Node>& exp) const; //----------------------------end constructing antecedants - /** Do pending facts - * - * This method asserts pending facts (d_pending) with explanations - * (d_pendingExp) to the equality engine of the theory of strings via calls - * to assertPendingFact. - * - * It terminates early if a conflict is encountered, for instance, by - * equality reasoning within the equality engine. - * - * Regardless of whether a conflict is encountered, the vector d_pending - * and map d_pendingExp are cleared. - */ - void doPendingFacts(); - /** Do pending lemmas - * - * This method flushes all pending lemmas (d_pending_lem) to the output - * channel of theory of strings. - * - * Like doPendingFacts, this function will terminate early if a conflict - * has already been encountered by the theory of strings. The vector - * d_pending_lem is cleared regardless of whether a conflict is discovered. - * - * Notice that as a result of the above design, some lemmas may be "dropped" - * if a conflict is discovered in between when a lemma is added to the - * pending vector of this class (via a sendInference call). Lemmas - * e.g. those that are required for initialization should not be sent via - * this class, since they should never be dropped. - */ - void doPendingLemmas(); /** * Have we processed an inference during this call to check? In particular, * this returns true if we have a pending fact or lemma, or have encountered * a conflict. */ bool hasProcessed() const; - /** Do we have a pending fact to add to the equality engine? */ - bool hasPendingFact() const { return !d_pending.empty(); } - /** Do we have a pending lemma to send on the output channel? */ - bool hasPendingLemma() const { return !d_pendingLem.empty(); } - /** make explanation - * - * This returns a node corresponding to the explanation of formulas in a, - * interpreted conjunctively. The returned node is a conjunction of literals - * that have been asserted to the equality engine. - */ - Node mkExplain(const std::vector<Node>& a) const; - /** Same as above, but the new literals an are append to the result */ - Node mkExplain(const std::vector<Node>& a, const std::vector<Node>& an) const; - /** - * Explain literal l, add conjuncts to assumptions vector instead of making - * the node corresponding to their conjunction. - */ - void explain(TNode literal, std::vector<TNode>& assumptions) const; // ------------------------------------------------- extended theory /** * Mark that terms a and b are congruent in the current context. @@ -281,48 +240,33 @@ class InferenceManager void markReduced(Node n, bool contextDepend = true); // ------------------------------------------------- end extended theory - private: - /** assert pending fact - * - * This asserts atom with polarity to the equality engine of this class, - * where exp is the explanation of why (~) atom holds. - * - * This call may trigger further initialization steps involving the terms - * of atom, including calls to registerTerm. + /** + * Called when ii is ready to be processed as a conflict. This makes a + * trusted node whose generator is the underlying proof equality engine + * (if it exists), and sends it on the output channel. */ - void assertPendingFact(Node atom, bool polarity, Node exp); + void processConflict(const InferInfo& ii); + + private: + /** Called when ii is ready to be processed as a fact */ + bool processFact(InferInfo& ii); + /** Called when ii is ready to be processed as a lemma */ + bool processLemma(InferInfo& ii); /** Reference to the solver state of the theory of strings. */ SolverState& d_state; /** Reference to the term registry of theory of strings */ TermRegistry& d_termReg; /** the extended theory object for the theory of strings */ ExtTheory& d_extt; - /** A reference to the output channel of the theory of strings. */ - OutputChannel& d_out; /** Reference to the statistics for the theory of strings/sequences. */ SequencesStatistics& d_statistics; - + /** Conversion from inferences to proofs */ + std::unique_ptr<InferProofCons> d_ipc; /** Common constants */ Node d_true; Node d_false; Node d_zero; Node d_one; - /** - * The list of pending literals to assert to the equality engine along with - * their explanation. - */ - std::vector<InferInfo> d_pending; - /** A map from literals to their pending phase requirement */ - std::map<Node, bool> d_pendingReqPhase; - /** A list of pending lemmas to be sent on the output channel. */ - std::vector<InferInfo> d_pendingLem; - /** - * The keep set of this class. This set is maintained to ensure that - * facts and their explanations are ref-counted. Since facts and their - * explanations are SAT-context-dependent, this set is also - * SAT-context-dependent. - */ - NodeSet d_keep; }; } // namespace strings diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 226dcbd17..020cedb30 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -6,7 +6,7 @@ theory THEORY_STRINGS ::CVC4::theory::strings::TheoryStrings "theory/strings/theory_strings.h" -properties check parametric propagate presolve +properties check parametric presolve rewriter ::CVC4::theory::strings::SequencesRewriter "theory/strings/sequences_rewriter.h" @@ -83,13 +83,14 @@ constant CONST_SEQUENCE \ "a sequence of characters" operator SEQ_UNIT 1 "a sequence of length one" +operator SEQ_NTH 2 "The nth element of a sequence" # equal equal / less than / output operator STRING_TO_REGEXP 1 "convert string to regexp" operator REGEXP_CONCAT 2: "regexp concat" operator REGEXP_UNION 2: "regexp union" operator REGEXP_INTER 2: "regexp intersection" -operator REGEXP_DIFF 2: "regexp difference" +operator REGEXP_DIFF 2 "regexp difference" operator REGEXP_STAR 1 "regexp *" operator REGEXP_PLUS 1 "regexp +" operator REGEXP_OPT 1 "regexp ?" @@ -169,5 +170,6 @@ typerule STRING_TOLOWER "SimpleTypeRule<RString, AString>" typerule CONST_SEQUENCE ::CVC4::theory::strings::ConstSequenceTypeRule typerule SEQ_UNIT ::CVC4::theory::strings::SeqUnitTypeRule +typerule SEQ_NTH ::CVC4::theory::strings::SeqNthTypeRule endtheory diff --git a/src/theory/strings/normal_form.cpp b/src/theory/strings/normal_form.cpp index 02af4904a..7fdf3ff76 100644 --- a/src/theory/strings/normal_form.cpp +++ b/src/theory/strings/normal_form.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -83,6 +83,7 @@ void NormalForm::addToExplanation(Node exp, unsigned new_val, unsigned new_rev_val) { + Assert(!exp.isConst()); if (std::find(d_exp.begin(), d_exp.end(), exp) == d_exp.end()) { d_exp.push_back(exp); @@ -177,11 +178,9 @@ void NormalForm::getExplanationForPrefixEq(NormalForm& nfi, Trace("strings-explain-prefix") << "Included " << curr_exp.size() << " / " << (nfi.d_exp.size() + nfj.d_exp.size()) << std::endl; - if (nfi.d_base != nfj.d_base) - { - Node eq = nfi.d_base.eqNode(nfj.d_base); - curr_exp.push_back(eq); - } + // add explanation for why they are equal + Node eq = nfi.d_base.eqNode(nfj.d_base); + curr_exp.push_back(eq); } } // namespace strings diff --git a/src/theory/strings/normal_form.h b/src/theory/strings/normal_form.h index bd60b0252..568b83319 100644 --- a/src/theory/strings/normal_form.h +++ b/src/theory/strings/normal_form.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/proof_checker.cpp b/src/theory/strings/proof_checker.cpp new file mode 100644 index 000000000..0b6cf6652 --- /dev/null +++ b/src/theory/strings/proof_checker.cpp @@ -0,0 +1,509 @@ +/********************* */ +/*! \file proof_checker.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of strings proof checker + **/ + +#include "theory/strings/proof_checker.h" + +#include "expr/sequence.h" +#include "options/strings_options.h" +#include "theory/rewriter.h" +#include "theory/strings/core_solver.h" +#include "theory/strings/regexp_elim.h" +#include "theory/strings/regexp_operation.h" +#include "theory/strings/term_registry.h" +#include "theory/strings/theory_strings_preprocess.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +void StringProofRuleChecker::registerTo(ProofChecker* pc) +{ + pc->registerChecker(PfRule::CONCAT_EQ, this); + pc->registerChecker(PfRule::CONCAT_UNIFY, this); + pc->registerChecker(PfRule::CONCAT_CONFLICT, this); + pc->registerChecker(PfRule::CONCAT_SPLIT, this); + pc->registerChecker(PfRule::CONCAT_CSPLIT, this); + pc->registerChecker(PfRule::CONCAT_LPROP, this); + pc->registerChecker(PfRule::CONCAT_CPROP, this); + pc->registerChecker(PfRule::STRING_DECOMPOSE, this); + pc->registerChecker(PfRule::STRING_LENGTH_POS, this); + pc->registerChecker(PfRule::STRING_LENGTH_NON_EMPTY, this); + pc->registerChecker(PfRule::STRING_REDUCTION, this); + pc->registerChecker(PfRule::STRING_EAGER_REDUCTION, this); + pc->registerChecker(PfRule::RE_INTER, this); + pc->registerChecker(PfRule::RE_UNFOLD_POS, this); + pc->registerChecker(PfRule::RE_UNFOLD_NEG, this); + pc->registerChecker(PfRule::RE_UNFOLD_NEG_CONCAT_FIXED, this); + pc->registerChecker(PfRule::RE_ELIM, this); + pc->registerChecker(PfRule::STRING_CODE_INJ, this); + pc->registerChecker(PfRule::STRING_SEQ_UNIT_INJ, this); + // trusted rules + pc->registerTrustedChecker(PfRule::STRING_TRUST, this, 2); +} + +Node StringProofRuleChecker::checkInternal(PfRule id, + const std::vector<Node>& children, + const std::vector<Node>& args) +{ + NodeManager* nm = NodeManager::currentNM(); + // core rules for word equations + if (id == PfRule::CONCAT_EQ || id == PfRule::CONCAT_UNIFY + || id == PfRule::CONCAT_CONFLICT || id == PfRule::CONCAT_SPLIT + || id == PfRule::CONCAT_CSPLIT || id == PfRule::CONCAT_LPROP + || id == PfRule::CONCAT_CPROP) + { + Trace("strings-pfcheck") << "Checking id " << id << std::endl; + Assert(children.size() >= 1); + Assert(args.size() == 1); + // all rules have an equality + if (children[0].getKind() != EQUAL) + { + return Node::null(); + } + // convert to concatenation form + std::vector<Node> tvec; + std::vector<Node> svec; + utils::getConcat(children[0][0], tvec); + utils::getConcat(children[0][1], svec); + size_t nchildt = tvec.size(); + size_t nchilds = svec.size(); + TypeNode stringType = children[0][0].getType(); + // extract the Boolean corresponding to whether the rule is reversed + bool isRev; + if (!getBool(args[0], isRev)) + { + return Node::null(); + } + if (id == PfRule::CONCAT_EQ) + { + Assert(children.size() == 1); + size_t index = 0; + std::vector<Node> tremVec; + std::vector<Node> sremVec; + // scan the concatenation until we exhaust child proofs + while (index < nchilds && index < nchildt) + { + Node currT = tvec[isRev ? (nchildt - 1 - index) : index]; + Node currS = svec[isRev ? (nchilds - 1 - index) : index]; + if (currT != currS) + { + if (currT.isConst() && currS.isConst()) + { + size_t sindex; + // get the equal prefix/suffix, strip and add the remainders + Node currR = Word::splitConstant(currT, currS, sindex, isRev); + if (!currR.isNull()) + { + // add the constant to remainder vec + std::vector<Node>& rem = sindex == 0 ? tremVec : sremVec; + rem.push_back(currR); + // ignore the current component + index++; + // In other words, if we have (currS,currT) = ("ab","abc"), then + // we proceed to the next component and add currR = "c" to + // tremVec. + } + // otherwise if we are not the same prefix, then both will be added + // Notice that we do not add maximal prefixes, in other words, + // ("abc", "abd") may be added to the remainder vectors, and not + // ("c", "d"). + } + break; + } + index++; + } + Assert(index <= nchildt); + Assert(index <= nchilds); + // the remainders are equal + tremVec.insert(isRev ? tremVec.begin() : tremVec.end(), + tvec.begin() + (isRev ? 0 : index), + tvec.begin() + nchildt - (isRev ? index : 0)); + sremVec.insert(isRev ? sremVec.begin() : sremVec.end(), + svec.begin() + (isRev ? 0 : index), + svec.begin() + nchilds - (isRev ? index : 0)); + // convert back to node + Node trem = utils::mkConcat(tremVec, stringType); + Node srem = utils::mkConcat(sremVec, stringType); + return trem.eqNode(srem); + } + // all remaining rules do something with the first child of each side + Node t0 = tvec[isRev ? nchildt - 1 : 0]; + Node s0 = svec[isRev ? nchilds - 1 : 0]; + if (id == PfRule::CONCAT_UNIFY) + { + Assert(children.size() == 2); + if (children[1].getKind() != EQUAL) + { + return Node::null(); + } + for (size_t i = 0; i < 2; i++) + { + Node l = children[1][i]; + if (l.getKind() != STRING_LENGTH) + { + return Node::null(); + } + Node term = i == 0 ? t0 : s0; + if (l[0] == term) + { + continue; + } + // could be a spliced constant + bool success = false; + if (term.isConst() && l[0].isConst()) + { + size_t lenL = Word::getLength(l[0]); + success = (isRev && l[0] == Word::suffix(term, lenL)) + || (!isRev && l[0] == Word::prefix(term, lenL)); + } + if (!success) + { + return Node::null(); + } + } + return children[1][0][0].eqNode(children[1][1][0]); + } + else if (id == PfRule::CONCAT_CONFLICT) + { + Assert(children.size() == 1); + if (!t0.isConst() || !s0.isConst()) + { + // not constants + return Node::null(); + } + size_t sindex; + Node r0 = Word::splitConstant(t0, s0, sindex, isRev); + if (!r0.isNull()) + { + // Not a conflict due to constants, i.e. s0 is a prefix of t0 or vice + // versa. + return Node::null(); + } + return nm->mkConst(false); + } + else if (id == PfRule::CONCAT_SPLIT) + { + Assert(children.size() == 2); + if (children[1].getKind() != NOT || children[1][0].getKind() != EQUAL + || children[1][0][0].getKind() != STRING_LENGTH + || children[1][0][0][0] != t0 + || children[1][0][1].getKind() != STRING_LENGTH + || children[1][0][1][0] != s0) + { + return Node::null(); + } + } + else if (id == PfRule::CONCAT_CSPLIT) + { + Assert(children.size() == 2); + Node zero = nm->mkConst(Rational(0)); + Node one = nm->mkConst(Rational(1)); + if (children[1].getKind() != NOT || children[1][0].getKind() != EQUAL + || children[1][0][0].getKind() != STRING_LENGTH + || children[1][0][0][0] != t0 || children[1][0][1] != zero) + { + return Node::null(); + } + if (!s0.isConst() || !s0.getType().isStringLike() || Word::isEmpty(s0)) + { + return Node::null(); + } + } + else if (id == PfRule::CONCAT_LPROP) + { + Assert(children.size() == 2); + if (children[1].getKind() != GT + || children[1][0].getKind() != STRING_LENGTH + || children[1][0][0] != t0 + || children[1][1].getKind() != STRING_LENGTH + || children[1][1][0] != s0) + { + return Node::null(); + } + } + else if (id == PfRule::CONCAT_CPROP) + { + Assert(children.size() == 2); + Node zero = nm->mkConst(Rational(0)); + + Trace("pfcheck-strings-cprop") + << "CONCAT_PROP, isRev=" << isRev << std::endl; + if (children[1].getKind() != NOT || children[1][0].getKind() != EQUAL + || children[1][0][0].getKind() != STRING_LENGTH + || children[1][0][0][0] != t0 || children[1][0][1] != zero) + { + Trace("pfcheck-strings-cprop") + << "...failed pattern match" << std::endl; + return Node::null(); + } + if (tvec.size() <= 1) + { + Trace("pfcheck-strings-cprop") + << "...failed adjacent constant" << std::endl; + return Node::null(); + } + Node w1 = tvec[isRev ? nchildt - 2 : 1]; + if (!w1.isConst() || !w1.getType().isStringLike() || Word::isEmpty(w1)) + { + Trace("pfcheck-strings-cprop") + << "...failed adjacent constant content" << std::endl; + return Node::null(); + } + Node w2 = s0; + if (!w2.isConst() || !w2.getType().isStringLike() || Word::isEmpty(w2)) + { + Trace("pfcheck-strings-cprop") << "...failed constant" << std::endl; + return Node::null(); + } + // getConclusion expects the adjacent constant to be included + t0 = nm->mkNode(STRING_CONCAT, isRev ? w1 : t0, isRev ? t0 : w1); + } + // use skolem cache + SkolemCache skc(false); + std::vector<Node> newSkolems; + Node conc = CoreSolver::getConclusion(t0, s0, id, isRev, &skc, newSkolems); + return conc; + } + else if (id == PfRule::STRING_DECOMPOSE) + { + Assert(children.size() == 1); + Assert(args.size() == 1); + bool isRev; + if (!getBool(args[0], isRev)) + { + return Node::null(); + } + Node atom = children[0]; + if (atom.getKind() != GEQ || atom[0].getKind() != STRING_LENGTH) + { + return Node::null(); + } + SkolemCache sc(false); + std::vector<Node> newSkolems; + Node conc = CoreSolver::getConclusion( + atom[0][0], atom[1], id, isRev, &sc, newSkolems); + return conc; + } + else if (id == PfRule::STRING_REDUCTION + || id == PfRule::STRING_EAGER_REDUCTION + || id == PfRule::STRING_LENGTH_POS) + { + Assert(children.empty()); + Assert(args.size() >= 1); + // These rules are based on calling a C++ method for returning a valid + // lemma involving a single argument term. + // Must convert to skolem form. + Node t = args[0]; + Node ret; + if (id == PfRule::STRING_REDUCTION) + { + Assert(args.size() == 1); + // we do not use optimizations + SkolemCache skc(false); + std::vector<Node> conj; + ret = StringsPreprocess::reduce(t, conj, &skc); + conj.push_back(t.eqNode(ret)); + ret = nm->mkAnd(conj); + } + else if (id == PfRule::STRING_EAGER_REDUCTION) + { + Assert(args.size() == 1); + SkolemCache skc(false); + ret = TermRegistry::eagerReduce(t, &skc); + } + else if (id == PfRule::STRING_LENGTH_POS) + { + Assert(args.size() == 1); + ret = TermRegistry::lengthPositive(t); + } + if (ret.isNull()) + { + return Node::null(); + } + return ret; + } + else if (id == PfRule::STRING_LENGTH_NON_EMPTY) + { + Assert(children.size() == 1); + Assert(args.empty()); + Node nemp = children[0]; + if (nemp.getKind() != NOT || nemp[0].getKind() != EQUAL + || !nemp[0][1].isConst() || !nemp[0][1].getType().isStringLike()) + { + return Node::null(); + } + if (!Word::isEmpty(nemp[0][1])) + { + return Node::null(); + } + Node zero = nm->mkConst(Rational(0)); + Node clen = nm->mkNode(STRING_LENGTH, nemp[0][0]); + return clen.eqNode(zero).notNode(); + } + else if (id == PfRule::RE_INTER) + { + Assert(children.size() >= 1); + Assert(args.empty()); + std::vector<Node> reis; + Node x; + // make the regular expression intersection that summarizes all + // memberships in the explanation + for (const Node& c : children) + { + bool polarity = c.getKind() != NOT; + Node catom = polarity ? c : c[0]; + if (catom.getKind() != STRING_IN_REGEXP) + { + return Node::null(); + } + if (x.isNull()) + { + x = catom[0]; + } + else if (x != catom[0]) + { + // different LHS + return Node::null(); + } + Node xcurr = catom[0]; + Node rcurr = + polarity ? catom[1] : nm->mkNode(REGEXP_COMPLEMENT, catom[1]); + reis.push_back(rcurr); + } + Node rei = reis.size() == 1 ? reis[0] : nm->mkNode(REGEXP_INTER, reis); + return nm->mkNode(STRING_IN_REGEXP, x, rei); + } + else if (id == PfRule::RE_UNFOLD_POS || id == PfRule::RE_UNFOLD_NEG + || id == PfRule::RE_UNFOLD_NEG_CONCAT_FIXED) + { + Assert(children.size() == 1); + Assert(args.empty()); + Node skChild = children[0]; + if (id == PfRule::RE_UNFOLD_NEG || id == PfRule::RE_UNFOLD_NEG_CONCAT_FIXED) + { + if (skChild.getKind() != NOT || skChild[0].getKind() != STRING_IN_REGEXP) + { + Trace("strings-pfcheck") << "...fail, non-neg member" << std::endl; + return Node::null(); + } + } + else if (skChild.getKind() != STRING_IN_REGEXP) + { + Trace("strings-pfcheck") << "...fail, non-pos member" << std::endl; + return Node::null(); + } + Node conc; + if (id == PfRule::RE_UNFOLD_POS) + { + std::vector<Node> newSkolems; + SkolemCache sc; + conc = RegExpOpr::reduceRegExpPos(skChild, &sc, newSkolems); + } + else if (id == PfRule::RE_UNFOLD_NEG) + { + conc = RegExpOpr::reduceRegExpNeg(skChild); + } + else if (id == PfRule::RE_UNFOLD_NEG_CONCAT_FIXED) + { + if (skChild[0][1].getKind() != REGEXP_CONCAT) + { + Trace("strings-pfcheck") << "...fail, no concat regexp" << std::endl; + return Node::null(); + } + size_t index; + Node reLen = RegExpOpr::getRegExpConcatFixed(skChild[0][1], index); + if (reLen.isNull()) + { + Trace("strings-pfcheck") << "...fail, non-fixed lengths" << std::endl; + return Node::null(); + } + conc = RegExpOpr::reduceRegExpNegConcatFixed(skChild, reLen, index); + } + return conc; + } + else if (id == PfRule::RE_ELIM) + { + Assert(children.size() == 1); + Assert(args.empty()); + return RegExpElimination::eliminate(children[0]); + } + else if (id == PfRule::STRING_CODE_INJ) + { + Assert(children.empty()); + Assert(args.size() == 2); + Assert(args[0].getType().isStringLike() + && args[1].getType().isStringLike()); + Node c1 = nm->mkNode(STRING_TO_CODE, args[0]); + Node c2 = nm->mkNode(STRING_TO_CODE, args[1]); + Node eqNegOne = c1.eqNode(nm->mkConst(Rational(-1))); + Node deq = c1.eqNode(c2).negate(); + Node eqn = args[0].eqNode(args[1]); + return nm->mkNode(kind::OR, eqNegOne, deq, eqn); + } + else if (id == PfRule::STRING_SEQ_UNIT_INJ) + { + Assert(children.size() == 1); + Assert(args.empty()); + if (children[0].getKind() != EQUAL) + { + return Node::null(); + } + Node t[2]; + for (size_t i = 0; i < 2; i++) + { + if (children[0][i].getKind() == SEQ_UNIT) + { + t[i] = children[0][i][0]; + } + else if (children[0][i].isConst()) + { + // notice that Word::getChars is not the right call here, since it + // gets a vector of sequences of length one. We actually need to + // extract the character, which is a sequence-specific operation. + const Sequence& sx = children[0][i].getConst<Sequence>(); + const std::vector<Node>& vec = sx.getVec(); + if (vec.size() == 1) + { + // the character of the single character sequence + t[i] = vec[0]; + } + } + if (t[i].isNull()) + { + return Node::null(); + } + } + Trace("strings-pfcheck-debug") + << "STRING_SEQ_UNIT_INJ: " << children[0] << " => " << t[0] + << " == " << t[1] << std::endl; + AlwaysAssert(t[0].getType() == t[1].getType()); + return t[0].eqNode(t[1]); + } + else if (id == PfRule::STRING_TRUST) + { + // "trusted" rules + Assert(!args.empty()); + Assert(args[0].getType().isBoolean()); + return args[0]; + } + return Node::null(); +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/proof_checker.h b/src/theory/strings/proof_checker.h new file mode 100644 index 000000000..a6dcd9df2 --- /dev/null +++ b/src/theory/strings/proof_checker.h @@ -0,0 +1,49 @@ +/********************* */ +/*! \file proof_checker.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Strings proof checker utility + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__PROOF_CHECKER_H +#define CVC4__THEORY__STRINGS__PROOF_CHECKER_H + +#include "expr/node.h" +#include "expr/proof_checker.h" +#include "expr/proof_node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** A checker for strings proofs */ +class StringProofRuleChecker : public ProofRuleChecker +{ + public: + StringProofRuleChecker() {} + ~StringProofRuleChecker() {} + + /** Register all rules owned by this rule checker in pc. */ + void registerTo(ProofChecker* pc) override; + + protected: + /** Return the conclusion of the given proof step, or null if it is invalid */ + Node checkInternal(PfRule id, + const std::vector<Node>& children, + const std::vector<Node>& args) override; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__PROOF_CHECKER_H */ diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp index 37920d248..1d0db0e4d 100644 --- a/src/theory/strings/regexp_elim.cpp +++ b/src/theory/strings/regexp_elim.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/regexp_elim.h b/src/theory/strings/regexp_elim.h index e5f2fa854..0c1acd29d 100644 --- a/src/theory/strings/regexp_elim.h +++ b/src/theory/strings/regexp_elim.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp index 7e1f42f37..0ab634c88 100644 --- a/src/theory/strings/regexp_entail.cpp +++ b/src/theory/strings/regexp_entail.cpp @@ -2,10 +2,10 @@ /*! \file regexp_entail.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Mathias Preiner + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/regexp_entail.h b/src/theory/strings/regexp_entail.h index 9fb797c45..2fe9961de 100644 --- a/src/theory/strings/regexp_entail.h +++ b/src/theory/strings/regexp_entail.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index a91210a7b..24d2e00bd 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -2,10 +2,10 @@ /*! \file regexp_operation.cpp ** \verbatim ** Top contributors (to current version): - ** Tianyi Liang, Andrew Reynolds, Andres Noetzli + ** Tianyi Liang, Andrew Reynolds, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -16,20 +16,20 @@ #include "theory/strings/regexp_operation.h" -#include "expr/kind.h" +#include "expr/node_algorithm.h" #include "options/strings_options.h" +#include "theory/rewriter.h" #include "theory/strings/regexp_entail.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" -using namespace CVC4; using namespace CVC4::kind; namespace CVC4 { namespace theory { namespace strings { -RegExpOpr::RegExpOpr() +RegExpOpr::RegExpOpr(SkolemCache* sc) : d_true(NodeManager::currentNM()->mkConst(true)), d_false(NodeManager::currentNM()->mkConst(false)), d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY, @@ -38,7 +38,9 @@ RegExpOpr::RegExpOpr() d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))), d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA, std::vector<Node>{})), - d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)) + d_sigma_star( + NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)), + d_sc(sc) { d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType()); @@ -117,161 +119,147 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) // 0-unknown, 1-yes, 2-no int RegExpOpr::delta( Node r, Node &exp ) { - Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl; + std::map<Node, std::pair<int, Node> >::const_iterator itd = + d_delta_cache.find(r); + if (itd != d_delta_cache.end()) + { + // already computed + exp = itd->second.second; + return itd->second.first; + } + Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl; int ret = 0; - if( d_delta_cache.find( r ) != d_delta_cache.end() ) { - ret = d_delta_cache[r].first; - exp = d_delta_cache[r].second; - } else { - Kind k = r.getKind(); - switch( k ) { - case kind::REGEXP_EMPTY: { - ret = 2; - break; - } - case kind::REGEXP_SIGMA: { - ret = 2; - break; - } - case kind::STRING_TO_REGEXP: { - Node tmp = Rewriter::rewrite(r[0]); - if(tmp.isConst()) { - if(tmp == d_emptyString) { - ret = 1; - } else { - ret = 2; - } + NodeManager* nm = NodeManager::currentNM(); + Kind k = r.getKind(); + switch (k) + { + case REGEXP_EMPTY: + case REGEXP_SIGMA: + case REGEXP_RANGE: + { + // does not contain empty string + ret = 2; + break; + } + case STRING_TO_REGEXP: + { + Node tmp = Rewriter::rewrite(r[0]); + if (tmp.isConst()) + { + if (tmp == d_emptyString) + { + ret = 1; } else { - ret = 0; - if(tmp.getKind() == kind::STRING_CONCAT) { - for(unsigned i=0; i<tmp.getNumChildren(); i++) { - if(tmp[i].isConst()) { - ret = 2; break; - } - } - - } - if(ret == 0) { - exp = r[0].eqNode(d_emptyString); - } - } - break; - } - case kind::REGEXP_CONCAT: { - bool flag = false; - std::vector< Node > vec_nodes; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - Node exp2; - int tmp = delta( r[i], exp2 ); - if(tmp == 2) { - ret = 2; - break; - } else if(tmp == 0) { - vec_nodes.push_back( exp2 ); - flag = true; - } - } - if(ret != 2) { - if(!flag) { - ret = 1; - } else { - exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes); - } + ret = 2; } - break; } - case kind::REGEXP_UNION: { - bool flag = false; - std::vector< Node > vec_nodes; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - Node exp2; - int tmp = delta( r[i], exp2 ); - if(tmp == 1) { - ret = 1; - break; - } else if(tmp == 0) { - vec_nodes.push_back( exp2 ); - flag = true; + else + { + ret = 0; + if (tmp.getKind() == STRING_CONCAT) + { + for (const Node& tmpc : tmp) + { + if (tmpc.isConst()) + { + ret = 2; + break; + } } } - if(ret != 1) { - if(!flag) { - ret = 2; - } else { - exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes); - } + if (ret == 0) + { + exp = r[0].eqNode(d_emptyString); } - break; } - case kind::REGEXP_INTER: { - bool flag = false; - std::vector< Node > vec_nodes; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - Node exp2; - int tmp = delta( r[i], exp2 ); - if(tmp == 2) { - ret = 2; - break; - } else if(tmp == 0) { - vec_nodes.push_back( exp2 ); - flag = true; - } + break; + } + case REGEXP_CONCAT: + case REGEXP_UNION: + case REGEXP_INTER: + { + // has there been an unknown child? + bool hasUnknownChild = false; + std::vector<Node> vec; + int checkTmp = k == REGEXP_UNION ? 1 : 2; + int retTmp = k == REGEXP_UNION ? 2 : 1; + for (const Node& rc : r) + { + Node exp2; + int tmp = delta(rc, exp2); + if (tmp == checkTmp) + { + // return is implied by the child's return value + ret = checkTmp; + break; } - if(ret != 2) { - if(!flag) { - ret = 1; - } else { - exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes); - } + else if (tmp == 0) + { + // unknown if child contains empty string + Assert(!exp2.isNull()); + vec.push_back(exp2); + hasUnknownChild = true; } - break; - } - case kind::REGEXP_STAR: { - ret = 1; - break; - } - case kind::REGEXP_PLUS: { - ret = delta( r[0], exp ); - break; - } - case kind::REGEXP_OPT: { - ret = 1; - break; - } - case kind::REGEXP_RANGE: { - ret = 2; - break; } - case kind::REGEXP_LOOP: { - uint32_t lo = utils::getLoopMinOccurrences(r); - if (lo == 0) + if (ret != checkTmp) + { + if (!hasUnknownChild) { - ret = 1; + ret = retTmp; } else { - ret = delta(r[0], exp); + Kind kr = k == REGEXP_UNION ? OR : AND; + exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec); } - break; } - case kind::REGEXP_COMPLEMENT: + break; + } + case REGEXP_STAR: + case REGEXP_OPT: + { + // contains empty string + ret = 1; + break; + } + case REGEXP_PLUS: + { + ret = delta(r[0], exp); + break; + } + case REGEXP_LOOP: + { + uint32_t lo = utils::getLoopMinOccurrences(r); + if (lo == 0) { - int tmp = delta(r[0], exp); - // flip the result if known - tmp = tmp == 0 ? 0 : (3 - tmp); - exp = exp.isNull() ? exp : exp.negate(); - break; + ret = 1; } - default: { - Assert(!utils::isRegExpKind(k)); - break; + else + { + ret = delta(r[0], exp); } + break; + } + case REGEXP_COMPLEMENT: + { + int tmp = delta(r[0], exp); + // flip the result if known + ret = tmp == 0 ? 0 : (3 - tmp); + exp = exp.isNull() ? exp : exp.negate(); + break; } - if(!exp.isNull()) { - exp = Rewriter::rewrite(exp); + default: + { + Assert(!utils::isRegExpKind(k)); + break; } - std::pair< int, Node > p(ret, exp); - d_delta_cache[r] = p; } - Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl; + if (!exp.isNull()) + { + exp = Rewriter::rewrite(exp); + } + std::pair<int, Node> p(ret, exp); + d_delta_cache[r] = p; + Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r + << ", expr = " << exp << std::endl; return ret; } @@ -835,444 +823,265 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset) } } -//simplify -void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) { - Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl; +Node RegExpOpr::simplify(Node t, bool polarity) +{ + Trace("strings-regexp-simpl") + << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl; Assert(t.getKind() == kind::STRING_IN_REGEXP); - Node str = t[0]; - Node re = t[1]; - if(polarity) { - simplifyPRegExp( str, re, new_nodes ); - } else { - simplifyNRegExp( str, re, new_nodes ); + Node tlit = polarity ? t : t.notNode(); + Node conc; + std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit); + if (itr != d_simpCache.end()) + { + return itr->second; } - Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n"; - for(unsigned i=0; i<new_nodes.size(); i++) { - Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl; + if (polarity) + { + std::vector<Node> newSkolems; + conc = reduceRegExpPos(tlit, d_sc, newSkolems); } -} -void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { - std::pair < Node, Node > p(s, r); - NodeManager *nm = NodeManager::currentNM(); - std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p); - if(itr != d_simpl_neg_cache.end()) { - new_nodes.push_back( itr->second ); - } else { - Kind k = r.getKind(); - Node conc; - switch( k ) { - case kind::REGEXP_EMPTY: { - conc = d_true; - break; - } - case kind::REGEXP_SIGMA: { - conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate(); - break; - } - case kind::REGEXP_RANGE: { - std::vector< Node > vec; - unsigned a = r[0].getConst<String>().front(); - unsigned b = r[1].getConst<String>().front(); - for (unsigned c = a; c <= b; c++) - { - std::vector<unsigned> tmpVec; - tmpVec.push_back(c); - Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate(); - vec.push_back( tmp ); - } - conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec); - break; - } - case kind::STRING_TO_REGEXP: { - conc = s.eqNode(r[0]).negate(); - break; - } - case kind::REGEXP_CONCAT: { - // The following simplification states that - // ~( s in R1 ++ R2 ) - // is equivalent to - // forall x. - // 0 <= x <= len(s) => - // ~( substr(s,0,x) in R1 ) OR ~( substr(s,x,len(s)-x) in R2) - Node lens = nm->mkNode(STRING_LENGTH, s); - // the index we are removing from the RE concatenation - unsigned indexRm = 0; - Node b1; - Node b1v; - // As an optimization to the above reduction, if we can determine that - // all strings in the language of R1 have the same length, say n, - // then the conclusion of the reduction is quantifier-free: - // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) - Node reLength = RegExpEntail::getFixedLengthForRegexp(r[0]); - if (reLength.isNull()) - { - // try from the opposite end - unsigned indexE = r.getNumChildren() - 1; - reLength = RegExpEntail::getFixedLengthForRegexp(r[indexE]); - if (!reLength.isNull()) - { - indexRm = indexE; - } - } - Node guard; - if (reLength.isNull()) - { - b1 = nm->mkBoundVar(nm->integerType()); - b1v = nm->mkNode(BOUND_VAR_LIST, b1); - guard = nm->mkNode(AND, - nm->mkNode(GEQ, b1, d_zero), - nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1)); - } - else - { - b1 = reLength; - } - Node s1; - Node s2; - if (indexRm == 0) - { - s1 = nm->mkNode(STRING_SUBSTR, s, d_zero, b1); - s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); - } - else - { - s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1); - s2 = - nm->mkNode(STRING_SUBSTR, s, d_zero, nm->mkNode(MINUS, lens, b1)); - } - Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[indexRm]).negate(); - std::vector<Node> nvec; - for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++) - { - if (i != indexRm) - { - nvec.push_back( r[i] ); - } - } - Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec); - r2 = Rewriter::rewrite(r2); - Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate(); - conc = nm->mkNode(OR, s1r1, s2r2); - if (!b1v.isNull()) - { - conc = nm->mkNode(OR, guard.negate(), conc); - conc = nm->mkNode(FORALL, b1v, conc); - } - break; - } - case kind::REGEXP_UNION: { - std::vector< Node > c_and; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - if(r[i].getKind() == kind::STRING_TO_REGEXP) { - c_and.push_back( r[i][0].eqNode(s).negate() ); - } else if(r[i].getKind() == kind::REGEXP_EMPTY) { - continue; - } else { - c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate()); - } - } - conc = c_and.size() == 0 ? d_true : - c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and); - break; - } - case kind::REGEXP_INTER: { - bool emptyflag = false; - std::vector< Node > c_or; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - if(r[i].getKind() == kind::STRING_TO_REGEXP) { - c_or.push_back( r[i][0].eqNode(s).negate() ); - } else if(r[i].getKind() == kind::REGEXP_EMPTY) { - emptyflag = true; - break; - } else { - c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate()); - } - } - if(emptyflag) { - conc = d_true; - } else { - conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or); - } - break; - } - case kind::REGEXP_STAR: { - if(s == d_emptyString) { - conc = d_false; - } else if(r[0].getKind() == kind::REGEXP_EMPTY) { - conc = s.eqNode(d_emptyString).negate(); - } else if(r[0].getKind() == kind::REGEXP_SIGMA) { - conc = d_false; - } else { - Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); - Node sne = s.eqNode(d_emptyString).negate(); - Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); - Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); - Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one), - NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) ); - //internal - Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1); - Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)); - Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate(); - Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate(); - - conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2); - conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc); - conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc); - conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc); - } - break; - } - case kind::REGEXP_LOOP: { - Assert(r.getNumChildren() == 3); - if(r[1] == r[2]) { - if(r[1] == d_zero) { - conc = s.eqNode(d_emptyString).negate(); - } else if(r[1] == d_one) { - conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]).negate(); - } else { - //unroll for now - unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt(); - std::vector<Node> vec; - for(unsigned i=0; i<l; i++) { - vec.push_back(r[0]); - } - Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec); - conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r2).negate(); - } - } else { - Assert(r[1] == d_zero); - //unroll for now - unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt(); - std::vector<Node> vec; - vec.push_back(d_emptySingleton); - std::vector<Node> vec2; - for(unsigned i=1; i<=u; i++) { - vec2.push_back(r[0]); - Node r2 = i==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec); - vec.push_back(r2); - } - Node r3 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec); - conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r3).negate(); - } - break; - } - case kind::REGEXP_COMPLEMENT: + else + { + // see if we can use an optimized version of the reduction for re.++. + Node r = t[1]; + if (r.getKind() == REGEXP_CONCAT) + { + // the index we are removing from the RE concatenation + size_t index = 0; + // As an optimization to the reduction, if we can determine that + // all strings in the language of R1 have the same length, say n, + // then the conclusion of the reduction is quantifier-free: + // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) + Node reLen = getRegExpConcatFixed(r, index); + if (!reLen.isNull()) { - // ~( s in complement(R) ) ---> s in R - conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]); - break; - } - default: { - Assert(!utils::isRegExpKind(k)); - break; + conc = reduceRegExpNegConcatFixed(tlit, reLen, index); } } - if (!conc.isNull()) + if (conc.isNull()) { - conc = Rewriter::rewrite(conc); - new_nodes.push_back(conc); - d_simpl_neg_cache[p] = conc; + conc = reduceRegExpNeg(tlit); } } + d_simpCache[tlit] = conc; + Trace("strings-regexp-simpl") + << "RegExpOpr::simplify: returns " << conc << std::endl; + return conc; } -void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { - std::pair < Node, Node > p(s, r); - NodeManager *nm = NodeManager::currentNM(); - std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p); - if(itr != d_simpl_cache.end()) { - new_nodes.push_back( itr->second ); - } else { - Kind k = r.getKind(); - Node conc; - switch( k ) { - case kind::REGEXP_EMPTY: { - conc = d_false; - break; - } - case kind::REGEXP_SIGMA: { - conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); - break; - } - case kind::REGEXP_RANGE: { - conc = s.eqNode( r[0] ); - if(r[0] != r[1]) { - unsigned a = r[0].getConst<String>().front(); - unsigned b = r[1].getConst<String>().front(); - a += 1; - std::vector<unsigned> anvec; - anvec.push_back(a); - Node an = nm->mkConst(String(anvec)); - Node tmp = a != b - ? nm->mkNode(kind::STRING_IN_REGEXP, - s, - nm->mkNode(kind::REGEXP_RANGE, an, r[1])) - : s.eqNode(r[1]); - conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp); - } - break; - } - case kind::STRING_TO_REGEXP: { - conc = s.eqNode(r[0]); - break; - } - case kind::REGEXP_CONCAT: { - std::vector< Node > nvec; - std::vector< Node > cc; - bool emptyflag = false; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - if(r[i].getKind() == kind::STRING_TO_REGEXP) { - cc.push_back( r[i][0] ); - } else if(r[i].getKind() == kind::REGEXP_EMPTY) { - emptyflag = true; - break; - } else { - Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" ); - Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]); - nvec.push_back(lem); - cc.push_back(sk); - } - } - if(emptyflag) { - conc = d_false; - } else { - Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) ); - nvec.push_back(lem); - conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec); - } - break; - } - case kind::REGEXP_UNION: { - std::vector< Node > c_or; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - if(r[i].getKind() == kind::STRING_TO_REGEXP) { - c_or.push_back( r[i][0].eqNode(s) ); - } else if(r[i].getKind() == kind::REGEXP_EMPTY) { - continue; - } else { - c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i])); - } - } - conc = c_or.size() == 0 ? d_false : - c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or); - break; - } - case kind::REGEXP_INTER: { - std::vector< Node > c_and; - bool emptyflag = false; - for(unsigned i=0; i<r.getNumChildren(); ++i) { - if(r[i].getKind() == kind::STRING_TO_REGEXP) { - c_and.push_back( r[i][0].eqNode(s) ); - } else if(r[i].getKind() == kind::REGEXP_EMPTY) { - emptyflag = true; - break; - } else { - c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i])); - } - } - if(emptyflag) { - conc = d_false; - } else { - conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and); - } - break; - } - case kind::REGEXP_STAR: { - if(s == d_emptyString) { - conc = d_true; - } else if(r[0].getKind() == kind::REGEXP_EMPTY) { - conc = s.eqNode(d_emptyString); - } else if(r[0].getKind() == kind::REGEXP_SIGMA) { - conc = d_true; - } else { - Node se = s.eqNode(d_emptyString); - Node sinr = nm->mkNode(kind::STRING_IN_REGEXP, s, r[0]); - Node sk1 = nm->mkSkolem( - "rs", s.getType(), "created for regular expression star"); - Node sk2 = nm->mkSkolem( - "rs", s.getType(), "created for regular expression star"); - Node sk3 = nm->mkSkolem( - "rs", s.getType(), "created for regular expression star"); - NodeBuilder<> nb(kind::AND); - nb << sk1.eqNode(d_emptyString).negate(); - nb << sk3.eqNode(d_emptyString).negate(); - nb << nm->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); - nb << nm->mkNode(kind::STRING_IN_REGEXP, sk2, r); - nb << nm->mkNode(kind::STRING_IN_REGEXP, sk3, r[0]); - nb << s.eqNode(nm->mkNode(kind::STRING_CONCAT, sk1, sk2, sk3)); - conc = nb; +Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index) +{ + Assert(r.getKind() == REGEXP_CONCAT); + index = 0; + Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]); + if (!reLen.isNull()) + { + return reLen; + } + // try from the opposite end + size_t indexE = r.getNumChildren() - 1; + reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]); + if (!reLen.isNull()) + { + index = indexE; + return reLen; + } + return Node::null(); +} - // We unfold `x in R*` by considering three cases: `x` is empty, `x` - // is matched by `R`, or `x` is matched by two or more `R`s. For the - // last case, we break `x` into three pieces, making the beginning - // and the end each match `R` and the middle match `R*`. Matching the - // beginning and the end with `R` allows us to reason about the - // beginning and the end of `x` simultaneously. - // - // x in R* ---> (x = "") v (x in R) v - // (x = x1 ++ x2 ++ x3 ^ x1 != "" ^ x3 != "" ^ - // x1 in R ^ x2 in R* ^ x3 in R) - conc = nm->mkNode(kind::OR, se, sinr, conc); - } - break; - } - case kind::REGEXP_LOOP: { - Assert(r.getNumChildren() == 3); - if(r[1] == d_zero) { - if(r[2] == d_zero) { - conc = s.eqNode( d_emptyString ); - } else { - //R{0,n} - if(s != d_emptyString) { - Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); - Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); - Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); - Node sk1ne = sk1.eqNode(d_emptyString).negate(); - Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); - unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt(); - Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); - Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, - NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], d_zero, u1)); - conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru); - conc = NodeManager::currentNM()->mkNode(kind::OR, - s.eqNode(d_emptyString), conc); - } else { - conc = d_true; - } - } - } else { - //R^n - Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); - Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); - Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); - Node sk1ne = sk1.eqNode(d_emptyString).negate(); - Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); - unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt(); - Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); - Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, - NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], u1, u1)); - conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru); - } - break; - } - case kind::REGEXP_COMPLEMENT: +Node RegExpOpr::reduceRegExpNeg(Node mem) +{ + Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); + Node s = mem[0][0]; + Node r = mem[0][1]; + NodeManager* nm = NodeManager::currentNM(); + Kind k = r.getKind(); + Node zero = nm->mkConst(Rational(0)); + Node conc; + if (k == REGEXP_CONCAT) + { + // do not use length entailment, call regular expression concat + Node reLen; + size_t i = 0; + conc = reduceRegExpNegConcatFixed(mem, reLen, i); + } + else if (k == REGEXP_STAR) + { + Node emp = Word::mkEmptyWord(s.getType()); + Node lens = nm->mkNode(STRING_LENGTH, s); + Node sne = s.eqNode(emp).negate(); + Node b1 = nm->mkBoundVar(nm->integerType()); + Node b1v = nm->mkNode(BOUND_VAR_LIST, b1); + Node g1 = + nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1)); + // internal + Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); + Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); + Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate(); + Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate(); + + conc = nm->mkNode(OR, s1r1, s2r2); + conc = nm->mkNode(IMPLIES, g1, conc); + conc = nm->mkNode(FORALL, b1v, conc); + conc = nm->mkNode(AND, sne, conc); + } + else + { + Assert(!utils::isRegExpKind(k)); + } + return conc; +} + +Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index) +{ + Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); + Node s = mem[0][0]; + Node r = mem[0][1]; + NodeManager* nm = NodeManager::currentNM(); + Assert(r.getKind() == REGEXP_CONCAT); + Node zero = nm->mkConst(Rational(0)); + // The following simplification states that + // ~( s in R1 ++ R2 ++... ++ Rn ) + // is equivalent to + // forall x. + // 0 <= x <= len(s) => + // ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn) + // Index is the child index of r that we are stripping off, which is either + // from the beginning or the end. + Assert(index == 0 || index == r.getNumChildren() - 1); + Node lens = nm->mkNode(STRING_LENGTH, s); + Node b1; + Node b1v; + Node guard; + if (reLen.isNull()) + { + b1 = SkolemCache::mkIndexVar(mem); + b1v = nm->mkNode(BOUND_VAR_LIST, b1); + guard = nm->mkNode(AND, + nm->mkNode(GEQ, b1, zero), + nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1)); + } + else + { + b1 = reLen; + } + Node s1; + Node s2; + if (index == 0) + { + s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); + s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); + } + else + { + s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1); + s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1)); + } + Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate(); + std::vector<Node> nvec; + for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++) + { + if (i != index) + { + nvec.push_back(r[i]); + } + } + Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec); + r2 = Rewriter::rewrite(r2); + Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate(); + Node conc = nm->mkNode(OR, s1r1, s2r2); + if (!b1v.isNull()) + { + conc = nm->mkNode(OR, guard.negate(), conc); + conc = nm->mkNode(FORALL, b1v, conc); + } + return conc; +} + +Node RegExpOpr::reduceRegExpPos(Node mem, + SkolemCache* sc, + std::vector<Node>& newSkolems) +{ + Assert(mem.getKind() == STRING_IN_REGEXP); + Node s = mem[0]; + Node r = mem[1]; + NodeManager* nm = NodeManager::currentNM(); + Kind k = r.getKind(); + Node conc; + if (k == REGEXP_CONCAT) + { + std::vector<Node> nvec; + std::vector<Node> cc; + // get the (valid) existential for this membership + Node eform = getExistsForRegExpConcatMem(mem); + SkolemManager* sm = nm->getSkolemManager(); + // Notice that this rule does not introduce witness terms, instead it + // uses skolems in the conclusion of the proof rule directly. Thus, + // the existential eform does not need to be explicitly justified by a + // proof here, since it is only being used as an intermediate formula in + // this inference. Hence we do not pass a proof generator to mkSkolemize. + sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem"); + Assert(newSkolems.size() == r.getNumChildren()); + // Look up skolems for each of the components. If sc has optimizations + // enabled, this will return arguments of str.to_re. + for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i) + { + if (r[i].getKind() == STRING_TO_REGEXP) { - // s in complement(R) ---> ~( s in R ) - conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]).negate(); - break; + // optimization, just take the body + newSkolems[i] = r[i][0]; } - default: { - Assert(!utils::isRegExpKind(k)); - break; + else + { + nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i])); } } - if (!conc.isNull()) - { - conc = Rewriter::rewrite(conc); - new_nodes.push_back(conc); - d_simpl_cache[p] = conc; - } + // (str.in_re x (re.++ R1 .... Rn)) => + // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn))) + Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems)); + nvec.push_back(lem); + conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); + } + else if (k == REGEXP_STAR) + { + Node emp = Word::mkEmptyWord(s.getType()); + Node se = s.eqNode(emp); + Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]); + Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]); + Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand); + // We unfold `x in R*` by considering three cases: `x` is empty, `x` + // is matched by `R`, or `x` is matched by two or more `R`s. For the + // last case, `x` will break into three pieces, making the beginning + // and the end each match `R` and the middle match `R*`. Matching the + // beginning and the end with `R` allows us to reason about the + // beginning and the end of `x` simultaneously. + // + // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R)) + + // We also immediately unfold the last disjunct for re.*. The advantage + // of doing this is that we use the same scheme for skolems above. + std::vector<Node> newSkolemsC; + sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC); + Assert(newSkolemsC.size() == 3); + // make the return lemma + // can also assume the component match the first and last R are non-empty. + // This means that the overall conclusion is: + // (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^ + // k1 in R ^ k2 in (re.* R) ^ k3 in R ^ + // k1 != "" ^ k3 != "") + conc = nm->mkNode(OR, + se, + sinr, + nm->mkNode(AND, + sinRExp, + newSkolemsC[0].eqNode(emp).negate(), + newSkolemsC[2].eqNode(emp).negate())); + } + else + { + Assert(!utils::isRegExpKind(k)); } + return conc; } bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) { @@ -1398,24 +1207,6 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { } } -bool RegExpOpr::testNoRV(Node r) { - std::map< Node, bool >::const_iterator itr = d_norv_cache.find(r); - if(itr != d_norv_cache.end()) { - return itr->second; - } else { - if(r.getKind() == kind::REGEXP_RV) { - return false; - } else if(r.getNumChildren() > 1) { - for(unsigned int i=0; i<r.getNumChildren(); i++) { - if(!testNoRV(r[i])) { - return false; - } - } - } - return true; - } -} - Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) { //Assert(checkConstRegExp(r1) && checkConstRegExp(r2)); if(r1 > r2) { @@ -1424,13 +1215,6 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > r2 = tmpNode; } Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl; - //if(Trace.isOn("regexp-debug")) { - // Trace("regexp-debug") << "... with cache:\n"; - // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin(); - // itr!=cache.end();itr++) { - // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl; - // } - //} std::pair < Node, Node > p(r1, r2); std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p); Node rNode; @@ -1542,7 +1326,8 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > } } Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode) << std::endl; - if(testNoRV(rNode)) { + if (!expr::hasSubtermKind(REGEXP_RV, rNode)) + { d_inter_cache[p] = rNode; } } @@ -1552,80 +1337,103 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > Node RegExpOpr::removeIntersection(Node r) { Assert(checkConstRegExp(r)); - std::map < Node, Node >::const_iterator itr = d_rm_inter_cache.find(r); - if(itr != d_rm_inter_cache.end()) { - return itr->second; - } - Node retNode; - Kind rk = r.getKind(); - switch (rk) + NodeManager* nm = NodeManager::currentNM(); + std::unordered_map<TNode, Node, TNodeHashFunction> visited; + std::unordered_map<TNode, Node, TNodeHashFunction>::iterator it; + std::vector<TNode> visit; + TNode cur; + visit.push_back(r); + do { - case REGEXP_EMPTY: - case REGEXP_SIGMA: - case REGEXP_RANGE: - case STRING_TO_REGEXP: - { - retNode = r; - break; - } - case REGEXP_CONCAT: - case REGEXP_UNION: - case REGEXP_STAR: - case REGEXP_COMPLEMENT: + cur = visit.back(); + visit.pop_back(); + it = visited.find(cur); + + if (it == visited.end()) { - NodeBuilder<> nb(rk); - for (const Node& rc : r) + visited[cur] = Node::null(); + visit.push_back(cur); + for (const Node& cn : cur) { - nb << removeIntersection(rc); + visit.push_back(cn); } - retNode = Rewriter::rewrite(nb.constructNode()); - break; } - - case REGEXP_INTER: + else if (it->second.isNull()) { - retNode = removeIntersection(r[0]); - for (size_t i = 1, nchild = r.getNumChildren(); i < nchild; i++) + Kind ck = cur.getKind(); + Node ret; + bool childChanged = false; + std::vector<Node> children; + if (cur.getMetaKind() == metakind::PARAMETERIZED) { - bool spflag = false; - Node tmpNode = removeIntersection(r[i]); - retNode = intersect(retNode, tmpNode, spflag); + children.push_back(cur.getOperator()); } - break; - } - case REGEXP_LOOP: - { - retNode = removeIntersection(r[0]); - retNode = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(REGEXP_LOOP, retNode, r[1], r[2])); - break; - } - default: - { - Unreachable(); + for (const Node& cn : cur) + { + it = visited.find(cn); + Assert(it != visited.end()); + Assert(!it->second.isNull()); + if (ck == REGEXP_INTER) + { + if (ret.isNull()) + { + ret = it->second; + } + else + { + ret = intersect(ret, it->second); + } + } + else + { + // will construct below + childChanged = childChanged || cn != it->second; + children.push_back(it->second); + } + } + if (ck != REGEXP_INTER) + { + if (childChanged) + { + ret = nm->mkNode(cur.getKind(), children); + } + else + { + ret = cur; + } + } + visited[cur] = ret; } + } while (!visit.empty()); + Assert(visited.find(r) != visited.end()); + Assert(!visited.find(r)->second.isNull()); + if (Trace.isOn("regexp-intersect")) + { + Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) + << " ) = " << mkString(visited[r]) << std::endl; } - d_rm_inter_cache[r] = retNode; - Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) << " ) = " << mkString(retNode) << std::endl; - return retNode; + return visited[r]; } -Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) { - if(checkConstRegExp(r1) && checkConstRegExp(r2)) { - Node rr1 = removeIntersection(r1); - Node rr2 = removeIntersection(r2); - std::map< PairNodes, Node > cache; - Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl; - Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl; - Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ")" << std::endl; - Node retNode = intersectInternal(rr1, rr2, cache, 1); - Trace("regexp-intersect") << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ") =\n\t" << mkString(retNode) << std::endl; - Trace("regexp-intersect-node") << "Intersect finished." << std::endl; - return retNode; - } else { - spflag = true; +Node RegExpOpr::intersect(Node r1, Node r2) +{ + if (!checkConstRegExp(r1) || !checkConstRegExp(r2)) + { return Node::null(); } + Node rr1 = removeIntersection(r1); + Node rr2 = removeIntersection(r2); + std::map<PairNodes, Node> cache; + Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl; + Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl; + Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) + << ",\n\t" << mkString(r2) << ")" << std::endl; + Node retNode = intersectInternal(rr1, rr2, cache, 1); + Trace("regexp-intersect") + << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2) + << ") =\n\t" << mkString(retNode) << std::endl; + Trace("regexp-intersect-node") << "Intersect finished." << std::endl; + return retNode; } //printing @@ -1709,16 +1517,15 @@ std::string RegExpOpr::mkString( Node r ) { break; } case kind::REGEXP_LOOP: { - retStr += "("; - retStr += mkString(r[0]); - retStr += ")"; - retStr += "{"; - retStr += r[1].getConst<Rational>().toString(); - retStr += ","; + uint32_t l = utils::getLoopMinOccurrences(r); + std::stringstream ss; + ss << "(" << mkString(r[0]) << "){" << l << ","; if(r.getNumChildren() == 3) { - retStr += r[2].getConst<Rational>().toString(); + uint32_t u = utils::getLoopMaxOccurrences(r); + ss << u; } - retStr += "}"; + ss << "}"; + retStr += ss.str(); break; } case kind::REGEXP_RV: { @@ -1760,6 +1567,50 @@ bool RegExpOpr::regExpIncludes(Node r1, Node r2) return result; } +/** + * Associating formulas with their "exists form", or an existentially + * quantified formula that is equivalent to it. This is currently used + * for regular expression memberships in the method below. + */ +struct ExistsFormAttributeId +{ +}; +typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute; + +Node RegExpOpr::getExistsForRegExpConcatMem(Node mem) +{ + // get or make the exists form of the membership + ExistsFormAttribute efa; + if (mem.hasAttribute(efa)) + { + // already computed + return mem.getAttribute(efa); + } + Assert(mem.getKind() == STRING_IN_REGEXP); + Node x = mem[0]; + Node r = mem[1]; + Assert(r.getKind() == REGEXP_CONCAT); + NodeManager* nm = NodeManager::currentNM(); + TypeNode xtn = x.getType(); + std::vector<Node> vars; + std::vector<Node> mems; + for (const Node& rc : r) + { + Node v = nm->mkBoundVar(xtn); + vars.push_back(v); + mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc)); + } + Node sconcat = nm->mkNode(STRING_CONCAT, vars); + Node eq = x.eqNode(sconcat); + mems.insert(mems.begin(), eq); + Node bvl = nm->mkNode(BOUND_VAR_LIST, vars); + Node ebody = nm->mkNode(AND, mems); + Node eform = nm->mkNode(EXISTS, bvl, ebody); + mem.setAttribute(efa, eform); + Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl; + return eform; +} + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index d0b0755eb..b9834e987 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -19,14 +19,14 @@ #ifndef CVC4__THEORY__STRINGS__REGEXP__OPERATION_H #define CVC4__THEORY__STRINGS__REGEXP__OPERATION_H -#include <vector> +#include <map> #include <set> -#include <algorithm> -#include <climits> -#include "util/hash.h" +#include <unordered_map> +#include <vector> + +#include "expr/node.h" +#include "theory/strings/skolem_cache.h" #include "util/string.h" -#include "theory/theory.h" -#include "theory/rewriter.h" namespace CVC4 { namespace theory { @@ -73,23 +73,17 @@ class RegExpOpr { Node d_sigma; Node d_sigma_star; - std::map<PairNodes, Node> d_simpl_cache; - std::map<PairNodes, Node> d_simpl_neg_cache; + /** A cache for simplify */ + std::map<Node, Node> d_simpCache; std::map<Node, std::pair<int, Node> > d_delta_cache; std::map<PairNodeStr, Node> d_dv_cache; std::map<PairNodeStr, std::pair<Node, int> > d_deriv_cache; - std::map<Node, std::pair<Node, int> > d_compl_cache; /** cache mapping regular expressions to whether they contain constants */ std::unordered_map<Node, RegExpConstType, NodeHashFunction> d_constCache; - std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_cset_cache; std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_fset_cache; std::map<PairNodes, Node> d_inter_cache; - std::map<Node, Node> d_rm_inter_cache; - std::map<Node, bool> d_norv_cache; std::map<Node, std::vector<PairNodes> > d_split_cache; std::map<PairNodes, bool> d_inclusionCache; - void simplifyPRegExp(Node s, Node r, std::vector<Node> &new_nodes); - void simplifyNRegExp(Node s, Node r, std::vector<Node> &new_nodes); /** * Helper function for mkString, pretty prints constant or variable regular * expression r. @@ -101,16 +95,19 @@ class RegExpOpr { bool containC2(unsigned cnt, Node n); Node convert1(unsigned cnt, Node n); void convert2(unsigned cnt, Node n, Node &r1, Node &r2); - bool testNoRV(Node r); Node intersectInternal(Node r1, Node r2, std::map<PairNodes, Node> cache, unsigned cnt); + /** + * Given a regular expression r, this returns an equivalent regular expression + * that contains no applications of intersection. + */ Node removeIntersection(Node r); void firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset); public: - RegExpOpr(); + RegExpOpr(SkolemCache* sc); ~RegExpOpr(); /** @@ -121,7 +118,44 @@ class RegExpOpr { bool checkConstRegExp( Node r ); /** get the constant type for regular expression r */ RegExpConstType getRegExpConstType(Node r); - void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); + /** Simplify + * + * This is the main method to simplify (unfold) a regular expression + * membership. It is called where t is of the form (str.in_re s r), + * and t (or (not t), when polarity=false) holds in the current context. + * It returns the unfolded form of t. + */ + Node simplify(Node t, bool polarity); + /** + * Given regular expression of the form + * (re.++ r_0 ... r_{n-1}) + * This returns a non-null node reLen and updates index such that + * RegExpEntail::getFixedLengthForRegexp(r_index) = reLen + * where index is set to either 0 or n-1. + */ + static Node getRegExpConcatFixed(Node r, size_t& index); + //------------------------ trusted reductions + /** + * Return the unfolded form of mem of the form (str.in_re s r). + */ + static Node reduceRegExpPos(Node mem, + SkolemCache* sc, + std::vector<Node>& newSkolems); + /** + * Return the unfolded form of mem of the form (not (str.in_re s r)). + */ + static Node reduceRegExpNeg(Node mem); + /** + * Return the unfolded form of mem of the form + * (not (str.in_re s (re.++ r_0 ... r_{n-1}))) + * Called when RegExpEntail::getFixedLengthForRegexp(r_index) = reLen + * where index is either 0 or n-1. + * + * This uses reLen as an optimization to improve the reduction. If reLen + * is null, then this optimization is not applied. + */ + static Node reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index); + //------------------------ end trusted reductions /** * This method returns 1 if the empty string is in r, 2 if the empty string * is not in r, or 0 if it is unknown whether the empty string is in r. @@ -141,9 +175,9 @@ class RegExpOpr { Node derivativeSingle( Node r, CVC4::String c ); /** * Returns the regular expression intersection of r1 and r2. If r1 or r2 is - * not constant, then this method returns null and sets spflag to true. + * not constant, then this method returns null. */ - Node intersect(Node r1, Node r2, bool &spflag); + Node intersect(Node r1, Node r2); /** Get the pretty printed version of the regular expression r */ static std::string mkString(Node r); @@ -155,6 +189,22 @@ class RegExpOpr { * for performance reasons. */ bool regExpIncludes(Node r1, Node r2); + + private: + /** + * Given a regular expression membership of the form: + * (str.in_re x (re.++ R1 ... Rn)) + * This returns the valid existentially quantified formula: + * (exists ((x1 String) ... (xn String)) + * (=> (str.in_re x (re.++ R1 ... Rn)) + * (and (= x (str.++ x1 ... xn)) + * (str.in_re x1 R1) ... (str.in_re xn Rn)))) + * Moreover, this formula is cached per regular expression membership via + * an attribute, meaning it is always the same for a given membership mem. + */ + static Node getExistsForRegExpConcatMem(Node mem); + /** pointer to the skolem cache used by this class */ + SkolemCache* d_sc; }; }/* CVC4::theory::strings namespace */ diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index 53c6c9acc..3f419c66d 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -33,19 +33,19 @@ namespace strings { RegExpSolver::RegExpSolver(SolverState& s, InferenceManager& im, + SkolemCache* skc, CoreSolver& cs, ExtfSolver& es, - SequencesStatistics& stats, - context::Context* c, - context::UserContext* u) + SequencesStatistics& stats) : d_state(s), d_im(im), d_csolver(cs), d_esolver(es), d_statistics(stats), - d_regexp_ucached(u), - d_regexp_ccached(c), - d_processed_memberships(c) + d_regexp_ucached(s.getUserContext()), + d_regexp_ccached(s.getSatContext()), + d_processed_memberships(s.getSatContext()), + d_regexp_opr(skc) { d_emptyString = NodeManager::currentNM()->mkConst(::CVC4::String("")); std::vector<Node> nvec; @@ -221,10 +221,13 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) else { // we have a conflict - std::vector<Node> exp_n; - exp_n.push_back(assertion); + std::vector<Node> iexp = nfexp; + std::vector<Node> noExplain; + iexp.push_back(assertion); + noExplain.push_back(assertion); Node conc = Node::null(); - d_im.sendInference(nfexp, exp_n, conc, Inference::RE_NF_CONFLICT); + d_im.sendInference( + iexp, noExplain, conc, Inference::RE_NF_CONFLICT); addedLemma = true; break; } @@ -260,16 +263,16 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) << "Unroll/simplify membership of atomic term " << rep << std::endl; // if so, do simple unrolling - std::vector<Node> nvec; Trace("strings-regexp") << "Simplify on " << atom << std::endl; - d_regexp_opr.simplify(atom, nvec, polarity); + Node conc = d_regexp_opr.simplify(atom, polarity); Trace("strings-regexp") << "...finished" << std::endl; // if simplifying successfully generated a lemma - if (!nvec.empty()) + if (!conc.isNull()) { - std::vector<Node> exp_n; - exp_n.push_back(assertion); - Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); + std::vector<Node> iexp = rnfexp; + std::vector<Node> noExplain; + iexp.push_back(assertion); + noExplain.push_back(assertion); Assert(atom.getKind() == STRING_IN_REGEXP); if (polarity) { @@ -281,7 +284,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) } Inference inf = polarity ? Inference::RE_UNFOLD_POS : Inference::RE_UNFOLD_NEG; - d_im.sendInference(rnfexp, exp_n, conc, inf); + d_im.sendInference(iexp, noExplain, conc, inf); addedLemma = true; if (changed) { @@ -401,7 +404,7 @@ bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems) Node conc; d_im.sendInference( - vec_nodes, conc, Inference::RE_INTER_INCLUDE, true); + vec_nodes, conc, Inference::RE_INTER_INCLUDE, false, true); return false; } } @@ -468,11 +471,9 @@ bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) rcti = rct; continue; } - bool spflag = false; - Node resR = d_regexp_opr.intersect(mi[1], m[1], spflag); + Node resR = d_regexp_opr.intersect(mi[1], m[1]); // intersection should be computable Assert(!resR.isNull()); - Assert(!spflag); if (resR == d_emptyRegexp) { // conflict, explain @@ -484,19 +485,21 @@ bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) vec_nodes.push_back(mi[0].eqNode(m[0])); } Node conc; - d_im.sendInference(vec_nodes, conc, Inference::RE_INTER_CONF, true); + d_im.sendInference( + vec_nodes, conc, Inference::RE_INTER_CONF, false, true); // conflict, return return false; } // rewrite to ensure the equality checks below are precise - Node mres = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, mi[0], resR)); - if (mres == mi) + Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR); + Node mresr = Rewriter::rewrite(mres); + if (mresr == mi) { // if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent // to x in R1, hence x in R2 can be marked redundant. d_im.markReduced(m); } - else if (mres == m) + else if (mresr == m) { // same as above, opposite direction d_im.markReduced(mi); @@ -512,7 +515,8 @@ bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) { vec_nodes.push_back(mi[0].eqNode(m[0])); } - d_im.sendInference(vec_nodes, mres, Inference::RE_INTER_INFER, true); + d_im.sendInference( + vec_nodes, mres, Inference::RE_INTER_INFER, false, true); // both are reduced d_im.markReduced(m); d_im.markReduced(mi); @@ -533,10 +537,12 @@ bool RegExpSolver::checkPDerivative( { case 0: { - std::vector<Node> exp_n; - exp_n.push_back(atom); - exp_n.push_back(x.eqNode(d_emptyString)); - d_im.sendInference(nf_exp, exp_n, exp, Inference::RE_DELTA); + std::vector<Node> noExplain; + noExplain.push_back(atom); + noExplain.push_back(x.eqNode(d_emptyString)); + std::vector<Node> iexp = nf_exp; + iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); + d_im.sendInference(iexp, noExplain, exp, Inference::RE_DELTA); addedLemma = true; d_regexp_ccached.insert(atom); return false; @@ -548,11 +554,12 @@ bool RegExpSolver::checkPDerivative( } case 2: { - std::vector<Node> exp_n; - exp_n.push_back(atom); - exp_n.push_back(x.eqNode(d_emptyString)); - Node conc; - d_im.sendInference(nf_exp, exp_n, conc, Inference::RE_DELTA_CONF); + std::vector<Node> noExplain; + noExplain.push_back(atom); + noExplain.push_back(x.eqNode(d_emptyString)); + std::vector<Node> iexp = nf_exp; + iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); + d_im.sendInference(iexp, noExplain, d_false, Inference::RE_DELTA_CONF); addedLemma = true; d_regexp_ccached.insert(atom); return false; @@ -641,9 +648,11 @@ bool RegExpSolver::deriveRegExp(Node x, conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); } } - std::vector<Node> exp_n; - exp_n.push_back(atom); - d_im.sendInference(ant, exp_n, conc, Inference::RE_DERIVE); + std::vector<Node> iexp = ant; + std::vector<Node> noExplain; + noExplain.push_back(atom); + iexp.push_back(atom); + d_im.sendInference(iexp, noExplain, conc, Inference::RE_DERIVE); return true; } return false; diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h index 9e9ba5845..92f2b1bac 100644 --- a/src/theory/strings/regexp_solver.h +++ b/src/theory/strings/regexp_solver.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -25,6 +25,7 @@ #include "expr/node.h" #include "theory/strings/extf_solver.h" #include "theory/strings/inference_manager.h" +#include "theory/strings/skolem_cache.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/sequences_stats.h" #include "theory/strings/solver_state.h" @@ -46,11 +47,10 @@ class RegExpSolver public: RegExpSolver(SolverState& s, InferenceManager& im, + SkolemCache* skc, CoreSolver& cs, ExtfSolver& es, - SequencesStatistics& stats, - context::Context* c, - context::UserContext* u); + SequencesStatistics& stats); ~RegExpSolver() {} /** check regular expression memberships diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 6ea467ae9..a32e5bc9e 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -2,10 +2,10 @@ /*! \file rewrites.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli + ** Andrew Reynolds, Andres Noetzli, Yoni Zohar ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -210,6 +210,7 @@ const char* toString(Rewrite r) case Rewrite::LEN_SEQ_UNIT: return "LEN_SEQ_UNIT"; case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM"; case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL"; + case Rewrite::SEQ_NTH_EVAL: return "SEQ_NTH_EVAL"; default: return "?"; } } diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index bc5de3a8a..f9824405b 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -2,10 +2,10 @@ /*! \file rewrites.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli + ** Andrew Reynolds, Andres Noetzli, Yoni Zohar ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -212,7 +212,8 @@ enum class Rewrite : uint32_t LEN_CONV_INV, LEN_SEQ_UNIT, CHARAT_ELIM, - SEQ_UNIT_EVAL + SEQ_UNIT_EVAL, + SEQ_NTH_EVAL }; /** diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 292960e6a..2cefe6b09 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -18,6 +18,7 @@ #include "expr/attribute.h" #include "expr/node_builder.h" +#include "expr/sequence.h" #include "theory/rewriter.h" #include "theory/strings/arith_entail.h" #include "theory/strings/regexp_entail.h" @@ -463,7 +464,8 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // (= (str.++ "A" x y) (str.++ x "AB" z)) ---> // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z))) std::vector<Node> rpfxv1; - if (StringsEntail::stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) + if (StringsEntail::stripSymbolicLength( + pfxv1, rpfxv1, 1, lenPfx0, true)) { std::vector<Node> sfxv0(v0.begin() + i, v0.end()); pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end()); @@ -490,7 +492,8 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // (= (str.++ x "AB" z) (str.++ "A" x y)) ---> // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y)) std::vector<Node> rpfxv0; - if (StringsEntail::stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) + if (StringsEntail::stripSymbolicLength( + pfxv0, rpfxv0, 1, lenPfx1, true)) { pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end()); std::vector<Node> sfxv1(v1.begin() + j, v1.end()); @@ -1488,6 +1491,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteSeqUnit(node); } + else if (nk == SEQ_NTH) + { + retNode = rewriteSeqNth(node); + } Trace("sequences-postrewrite") << "Strings::SequencesRewriter::postRewrite returning " << retNode @@ -1507,6 +1514,33 @@ RewriteResponse SequencesRewriter::preRewrite(TNode node) return RewriteResponse(REWRITE_DONE, node); } +Node SequencesRewriter::rewriteSeqNth(Node node) +{ + Assert(node.getKind() == SEQ_NTH); + Node ret; + Node s = node[0]; + Node i = node[1]; + if (s.isConst() && i.isConst()) + { + size_t len = Word::getLength(s); + size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt(); + if (pos < len) + { + std::vector<Node> elements = s.getConst<Sequence>().getVec(); + ret = elements[pos]; + return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL); + } + else + { + return node; + } + } + else + { + return node; + } +} + Node SequencesRewriter::rewriteCharAt(Node node) { Assert(node.getKind() == STRING_CHARAT); @@ -2990,31 +3024,34 @@ Node SequencesRewriter::rewriteReplaceRe(Node node) Node y = node[1]; Node z = node[2]; - if (x.isConst()) + if (RegExpEntail::isConstRegExp(y)) { - // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z" - std::pair<size_t, size_t> match = firstMatch(x, y); - if (match.first != string::npos) + if (x.isConst()) { - String s = x.getConst<String>(); - Node ret = nm->mkNode(STRING_CONCAT, - nm->mkConst(s.substr(0, match.first)), - z, - nm->mkConst(s.substr(match.second))); - return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL); + // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z" + std::pair<size_t, size_t> match = firstMatch(x, y); + if (match.first != string::npos) + { + String s = x.getConst<String>(); + Node ret = nm->mkNode(STRING_CONCAT, + nm->mkConst(s.substr(0, match.first)), + z, + nm->mkConst(s.substr(match.second))); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL); + } + else + { + return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL); + } } - else + // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true + String emptyStr(""); + if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y)) { - return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL); + Node ret = nm->mkNode(STRING_CONCAT, z, x); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE); } } - // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true - String emptyStr(""); - if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y)) - { - Node ret = nm->mkNode(STRING_CONCAT, z, x); - return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE); - } return node; } @@ -3026,31 +3063,34 @@ Node SequencesRewriter::rewriteReplaceReAll(Node node) Node y = node[1]; Node z = node[2]; - if (x.isConst()) + if (RegExpEntail::isConstRegExp(y)) { - // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) ---> - // "Z" ++ y ++ "Z" ++ y - TypeNode t = x.getType(); - Node emp = Word::mkEmptyWord(t); - Node yp = Rewriter::rewrite( - nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp))); - std::vector<Node> res; - String rem = x.getConst<String>(); - std::pair<size_t, size_t> match(0, 0); - while (rem.size() >= 0) + if (x.isConst()) { - match = firstMatch(nm->mkConst(rem), yp); - if (match.first == string::npos) - { - break; + // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) ---> + // "Z" ++ y ++ "Z" ++ y + TypeNode t = x.getType(); + Node emp = Word::mkEmptyWord(t); + Node yp = Rewriter::rewrite( + nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp))); + std::vector<Node> res; + String rem = x.getConst<String>(); + std::pair<size_t, size_t> match(0, 0); + while (rem.size() >= 0) + { + match = firstMatch(nm->mkConst(rem), yp); + if (match.first == string::npos) + { + break; + } + res.push_back(nm->mkConst(rem.substr(0, match.first))); + res.push_back(z); + rem = rem.substr(match.second); } - res.push_back(nm->mkConst(rem.substr(0, match.first))); - res.push_back(z); - rem = rem.substr(match.second); + res.push_back(nm->mkConst(rem)); + Node ret = utils::mkConcat(res, t); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL); } - res.push_back(nm->mkConst(rem)); - Node ret = utils::mkConcat(res, t); - return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL); } return node; diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 47a20a7ca..105bfc105 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -2,10 +2,10 @@ /*! \file sequences_rewriter.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli + ** Andrew Reynolds, Andres Noetzli, Yoni Zohar ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -264,6 +264,13 @@ class SequencesRewriter : public TheoryRewriter */ Node rewriteSeqUnit(Node node); + /** rewrite seq.nth + * This is the entry point for post-rewriting terms n of the form + * seq.nth(s, i) + * Returns the rewritten form of node. + */ + Node rewriteSeqNth(Node node); + /** length preserving rewrite * * Given input n, this returns a string n' whose length is equivalent to n. diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp index 502d05353..fe6bc548e 100644 --- a/src/theory/strings/sequences_stats.cpp +++ b/src/theory/strings/sequences_stats.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -25,13 +25,14 @@ SequencesStatistics::SequencesStatistics() : d_checkRuns("theory::strings::checkRuns", 0), d_strategyRuns("theory::strings::strategyRuns", 0), d_inferences("theory::strings::inferences"), + d_inferencesNoPf("theory::strings::inferencesNoPf"), d_cdSimplifications("theory::strings::cdSimplifications"), d_reductions("theory::strings::reductions"), d_regexpUnfoldingsPos("theory::strings::regexpUnfoldingsPos"), d_regexpUnfoldingsNeg("theory::strings::regexpUnfoldingsNeg"), d_rewrites("theory::strings::rewrites"), d_conflictsEqEngine("theory::strings::conflictsEqEngine", 0), - d_conflictsEagerPrefix("theory::strings::conflictsEagerPrefix", 0), + d_conflictsEager("theory::strings::conflictsEager", 0), d_conflictsInfer("theory::strings::conflictsInfer", 0), d_lemmasEagerPreproc("theory::strings::lemmasEagerPreproc", 0), d_lemmasCmiSplit("theory::strings::lemmasCmiSplit", 0), @@ -43,13 +44,14 @@ SequencesStatistics::SequencesStatistics() smtStatisticsRegistry()->registerStat(&d_checkRuns); smtStatisticsRegistry()->registerStat(&d_strategyRuns); smtStatisticsRegistry()->registerStat(&d_inferences); + smtStatisticsRegistry()->registerStat(&d_inferencesNoPf); smtStatisticsRegistry()->registerStat(&d_cdSimplifications); smtStatisticsRegistry()->registerStat(&d_reductions); smtStatisticsRegistry()->registerStat(&d_regexpUnfoldingsPos); smtStatisticsRegistry()->registerStat(&d_regexpUnfoldingsNeg); smtStatisticsRegistry()->registerStat(&d_rewrites); smtStatisticsRegistry()->registerStat(&d_conflictsEqEngine); - smtStatisticsRegistry()->registerStat(&d_conflictsEagerPrefix); + smtStatisticsRegistry()->registerStat(&d_conflictsEager); smtStatisticsRegistry()->registerStat(&d_conflictsInfer); smtStatisticsRegistry()->registerStat(&d_lemmasEagerPreproc); smtStatisticsRegistry()->registerStat(&d_lemmasCmiSplit); @@ -63,13 +65,14 @@ SequencesStatistics::~SequencesStatistics() smtStatisticsRegistry()->unregisterStat(&d_checkRuns); smtStatisticsRegistry()->unregisterStat(&d_strategyRuns); smtStatisticsRegistry()->unregisterStat(&d_inferences); + smtStatisticsRegistry()->unregisterStat(&d_inferencesNoPf); smtStatisticsRegistry()->unregisterStat(&d_cdSimplifications); smtStatisticsRegistry()->unregisterStat(&d_reductions); smtStatisticsRegistry()->unregisterStat(&d_regexpUnfoldingsPos); smtStatisticsRegistry()->unregisterStat(&d_regexpUnfoldingsNeg); smtStatisticsRegistry()->unregisterStat(&d_rewrites); smtStatisticsRegistry()->unregisterStat(&d_conflictsEqEngine); - smtStatisticsRegistry()->unregisterStat(&d_conflictsEagerPrefix); + smtStatisticsRegistry()->unregisterStat(&d_conflictsEager); smtStatisticsRegistry()->unregisterStat(&d_conflictsInfer); smtStatisticsRegistry()->unregisterStat(&d_lemmasEagerPreproc); smtStatisticsRegistry()->unregisterStat(&d_lemmasCmiSplit); diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h index 85f830fa2..e35d951f7 100644 --- a/src/theory/strings/sequences_stats.h +++ b/src/theory/strings/sequences_stats.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -63,6 +63,11 @@ class SequencesStatistics /** Counts the number of applications of each type of inference */ HistogramStat<Inference> d_inferences; /** + * Counts the number of applications of each type of inference that were not + * processed as a proof step. This is a subset of d_inferences. + */ + HistogramStat<Inference> d_inferencesNoPf; + /** * Counts the number of applications of each type of context-dependent * simplification. The sum of this map is equal to the number of EXTF or * EXTF_N inferences. @@ -87,8 +92,8 @@ class SequencesStatistics //--------------- conflicts, partition of calls to OutputChannel::conflict /** Number of equality engine conflicts */ IntStat d_conflictsEqEngine; - /** Number of eager prefix conflicts */ - IntStat d_conflictsEagerPrefix; + /** Number of eager conflicts */ + IntStat d_conflictsEager; /** Number of inference conflicts */ IntStat d_conflictsInfer; //--------------- end of conflicts diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 8fb854d91..a1e04071b 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 302c69e83..0a6dd367f 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -2,10 +2,10 @@ /*! \file skolem_cache.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Mathias Preiner + ** Andrew Reynolds, Andres Noetzli, Yoni Zohar ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -138,6 +138,11 @@ class SkolemCache // where b is a regular expression, n is the number of occurrences of b // in a, and k(0)=0. SK_OCCUR_LEN, + // For function k: ((Seq U) x Int) -> U + // exists k. + // forall s, n. + // k(s, n) is some undefined value of sort U + SK_NTH, }; /** * Returns a skolem of type string that is cached for (a,b,id) and has diff --git a/src/theory/strings/solver_state.cpp b/src/theory/strings/solver_state.cpp index 06a86935f..89d77b033 100644 --- a/src/theory/strings/solver_state.cpp +++ b/src/theory/strings/solver_state.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -27,17 +27,11 @@ namespace strings { SolverState::SolverState(context::Context* c, context::UserContext* u, - eq::EqualityEngine& ee, Valuation& v) - : d_context(c), - d_ucontext(u), - d_ee(ee), - d_eeDisequalities(c), - d_valuation(v), - d_conflict(c, false), - d_pendingConflict(c) + : TheoryState(c, u, v), d_eeDisequalities(c), d_pendingConflictSet(c, false) { d_zero = NodeManager::currentNM()->mkConst(Rational(0)); + d_false = NodeManager::currentNM()->mkConst(false); } SolverState::~SolverState() @@ -48,53 +42,6 @@ SolverState::~SolverState() } } -context::Context* SolverState::getSatContext() const { return d_context; } -context::UserContext* SolverState::getUserContext() const { return d_ucontext; } - -Node SolverState::getRepresentative(Node t) const -{ - if (d_ee.hasTerm(t)) - { - return d_ee.getRepresentative(t); - } - return t; -} - -bool SolverState::hasTerm(Node a) const { return d_ee.hasTerm(a); } - -bool SolverState::areEqual(Node a, Node b) const -{ - if (a == b) - { - return true; - } - else if (hasTerm(a) && hasTerm(b)) - { - return d_ee.areEqual(a, b); - } - return false; -} - -bool SolverState::areDisequal(Node a, Node b) const -{ - if (a == b) - { - return false; - } - else if (hasTerm(a) && hasTerm(b)) - { - Node ar = d_ee.getRepresentative(a); - Node br = d_ee.getRepresentative(b); - return (ar != br && ar.isConst() && br.isConst()) - || d_ee.areDisequal(ar, br, false); - } - Node ar = getRepresentative(a); - Node br = getRepresentative(b); - return ar != br && ar.isConst() && br.isConst(); -} - -eq::EqualityEngine* SolverState::getEqualityEngine() const { return &d_ee; } - const context::CDList<Node>& SolverState::getDisequalityList() const { return d_eeDisequalities; @@ -105,7 +52,7 @@ void SolverState::eqNotifyNewClass(TNode t) Kind k = t.getKind(); if (k == STRING_LENGTH || k == STRING_TO_CODE) { - Node r = d_ee.getRepresentative(t[0]); + Node r = d_ee->getRepresentative(t[0]); EqcInfo* ei = getOrMakeEqcInfo(r); if (k == STRING_LENGTH) { @@ -118,10 +65,12 @@ void SolverState::eqNotifyNewClass(TNode t) } else if (t.isConst()) { - EqcInfo* ei = getOrMakeEqcInfo(t); - ei->d_prefixC = t; - ei->d_suffixC = t; - return; + if (t.getType().isStringLike()) + { + EqcInfo* ei = getOrMakeEqcInfo(t); + ei->d_prefixC = t; + ei->d_suffixC = t; + } } else if (k == STRING_CONCAT) { @@ -129,11 +78,12 @@ void SolverState::eqNotifyNewClass(TNode t) } } -void SolverState::eqNotifyPreMerge(TNode t1, TNode t2) +void SolverState::eqNotifyMerge(TNode t1, TNode t2) { EqcInfo* e2 = getOrMakeEqcInfo(t2, false); if (e2) { + Assert(t1.getType().isStringLike()); EqcInfo* e1 = getOrMakeEqcInfo(t1); // add information from e2 to e1 if (!e2->d_lengthTerm.get().isNull()) @@ -146,12 +96,12 @@ void SolverState::eqNotifyPreMerge(TNode t1, TNode t2) } if (!e2->d_prefixC.get().isNull()) { - setPendingConflictWhen( + setPendingPrefixConflictWhen( e1->addEndpointConst(e2->d_prefixC, Node::null(), false)); } if (!e2->d_suffixC.get().isNull()) { - setPendingConflictWhen( + setPendingPrefixConflictWhen( e1->addEndpointConst(e2->d_suffixC, Node::null(), true)); } if (e2->d_cardinalityLemK.get() > e1->d_cardinalityLemK.get()) @@ -191,7 +141,7 @@ EqcInfo* SolverState::getOrMakeEqcInfo(Node eqc, bool doMake) return nullptr; } -TheoryModel* SolverState::getModel() const { return d_valuation.getModel(); } +TheoryModel* SolverState::getModel() { return d_valuation.getModel(); } void SolverState::addEndpointsToEqcInfo(Node t, Node concat, Node eqc) { @@ -212,7 +162,7 @@ void SolverState::addEndpointsToEqcInfo(Node t, Node concat, Node eqc) Trace("strings-eager-pconf-debug") << "New term: " << concat << " for " << t << " with prefix " << c << " (" << (r == 1) << ")" << std::endl; - setPendingConflictWhen(ei->addEndpointConst(t, c, r == 1)); + setPendingPrefixConflictWhen(ei->addEndpointConst(t, c, r == 1)); } } } @@ -278,18 +228,39 @@ bool SolverState::isEqualEmptyWord(Node s, Node& emps) return false; } -void SolverState::setConflict() { d_conflict = true; } -bool SolverState::isInConflict() const { return d_conflict; } +void SolverState::setPendingPrefixConflictWhen(Node conf) +{ + if (conf.isNull() || d_pendingConflictSet.get()) + { + return; + } + InferInfo iiPrefixConf; + iiPrefixConf.d_id = Inference::PREFIX_CONFLICT; + iiPrefixConf.d_conc = d_false; + utils::flattenOp(AND, conf, iiPrefixConf.d_ant); + setPendingConflict(iiPrefixConf); +} -void SolverState::setPendingConflictWhen(Node conf) +void SolverState::setPendingConflict(InferInfo& ii) { - if (!conf.isNull() && d_pendingConflict.get().isNull()) + if (!d_pendingConflictSet.get()) { - d_pendingConflict = conf; + d_pendingConflict = ii; + d_pendingConflictSet.set(true); } } -Node SolverState::getPendingConflict() const { return d_pendingConflict; } +bool SolverState::hasPendingConflict() const { return d_pendingConflictSet; } + +bool SolverState::getPendingConflict(InferInfo& ii) const +{ + if (d_pendingConflictSet) + { + ii = d_pendingConflict; + return true; + } + return false; +} std::pair<bool, Node> SolverState::entailmentCheck(options::TheoryOfMode mode, TNode lit) @@ -314,14 +285,14 @@ void SolverState::separateByLength( NodeManager* nm = NodeManager::currentNM(); for (const Node& eqc : n) { - Assert(d_ee.getRepresentative(eqc) == eqc); + Assert(d_ee->getRepresentative(eqc) == eqc); TypeNode tnEqc = eqc.getType(); EqcInfo* ei = getOrMakeEqcInfo(eqc, false); Node lt = ei ? ei->d_lengthTerm : Node::null(); if (!lt.isNull()) { lt = nm->mkNode(STRING_LENGTH, lt); - Node r = d_ee.getRepresentative(lt); + Node r = d_ee->getRepresentative(lt); std::pair<Node, TypeNode> lkey(r, tnEqc); if (eqc_to_leqc.find(lkey) == eqc_to_leqc.end()) { diff --git a/src/theory/strings/solver_state.h b/src/theory/strings/solver_state.h index 2eee90ca4..291a15feb 100644 --- a/src/theory/strings/solver_state.h +++ b/src/theory/strings/solver_state.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Morgan Deters ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -19,12 +19,15 @@ #include <map> +#include "context/cdlist.h" #include "context/context.h" #include "expr/node.h" +#include "theory/strings/eqc_info.h" +#include "theory/strings/infer_info.h" #include "theory/theory_model.h" +#include "theory/theory_state.h" #include "theory/uf/equality_engine.h" #include "theory/valuation.h" -#include "theory/strings/eqc_info.h" namespace CVC4 { namespace theory { @@ -39,41 +42,17 @@ namespace strings { * (2) Whether the set of assertions is in conflict. * (3) Equivalence class information as in the class above. */ -class SolverState +class SolverState : public TheoryState { typedef context::CDList<Node> NodeList; public: SolverState(context::Context* c, context::UserContext* u, - eq::EqualityEngine& ee, Valuation& v); ~SolverState(); - /** Get the SAT context */ - context::Context* getSatContext() const; - /** Get the user context */ - context::UserContext* getUserContext() const; //-------------------------------------- equality information /** - * Get the representative of t in the equality engine of this class, or t - * itself if it is not registered as a term. - */ - Node getRepresentative(Node t) const; - /** Is t registered as a term in the equality engine of this class? */ - bool hasTerm(Node a) const; - /** - * Are a and b equal according to the equality engine of this class? Also - * returns true if a and b are identical. - */ - bool areEqual(Node a, Node b) const; - /** - * Are a and b disequal according to the equality engine of this class? Also - * returns true if the representative of a and b are distinct constants. - */ - bool areDisequal(Node a, Node b) const; - /** get equality engine */ - eq::EqualityEngine* getEqualityEngine() const; - /** * Get the list of disequalities that are currently asserted to the equality * engine. */ @@ -82,21 +61,13 @@ class SolverState //-------------------------------------- notifications for equalities /** called when a new equivalence class is created */ void eqNotifyNewClass(TNode t); - /** called when two equivalence classes will merge */ - void eqNotifyPreMerge(TNode t1, TNode t2); + /** called when two equivalence classes merge */ + void eqNotifyMerge(TNode t1, TNode t2); /** called when two equivalence classes are made disequal */ void eqNotifyDisequal(TNode t1, TNode t2, TNode reason); //-------------------------------------- end notifications for equalities //------------------------------------------ conflicts - /** - * Set that the current state of the solver is in conflict. This should be - * called immediately after a call to conflict(...) on the output channel of - * the theory of strings. - */ - void setConflict(); - /** Are we currently in conflict? */ - bool isInConflict() const; - /** set pending conflict + /** set pending prefix conflict * * If conf is non-null, this is called when conf is a conjunction of literals * that hold in the current context that are unsatisfiable. It is set as the @@ -106,9 +77,16 @@ class SolverState * during a merge operation, when the equality engine is not in a state to * provide explanations. */ - void setPendingConflictWhen(Node conf); + void setPendingPrefixConflictWhen(Node conf); + /** + * Set pending conflict, infer info version. Called when we are in conflict + * based on the inference ii. This generalizes the above method. + */ + void setPendingConflict(InferInfo& ii); + /** return true if we have a pending conflict */ + bool hasPendingConflict() const; /** get the pending conflict, or null if none exist */ - Node getPendingConflict() const; + bool getPendingConflict(InferInfo& ii) const; //------------------------------------------ end conflicts /** get length with explanation * @@ -149,7 +127,7 @@ class SolverState */ EqcInfo* getOrMakeEqcInfo(Node eqc, bool doMake = true); /** Get pointer to the model object of the Valuation object */ - TheoryModel* getModel() const; + TheoryModel* getModel(); /** add endpoints to eqc info * @@ -182,23 +160,16 @@ class SolverState private: /** Common constants */ Node d_zero; - /** Pointer to the SAT context object used by the theory of strings. */ - context::Context* d_context; - /** Pointer to the user context object used by the theory of strings. */ - context::UserContext* d_ucontext; - /** Reference to equality engine of the theory of strings. */ - eq::EqualityEngine& d_ee; + Node d_false; /** * The (SAT-context-dependent) list of disequalities that have been asserted * to the equality engine above. */ NodeList d_eeDisequalities; - /** Reference to the valuation of the theory of strings */ - Valuation& d_valuation; - /** Are we in conflict? */ - context::CDO<bool> d_conflict; /** The pending conflict if one exists */ - context::CDO<Node> d_pendingConflict; + context::CDO<bool> d_pendingConflictSet; + /** The pending conflict, valid if the above flag is true */ + InferInfo d_pendingConflict; /** Map from representatives to their equivalence class information */ std::map<Node, EqcInfo*> d_eqcInfo; }; /* class TheoryStrings */ diff --git a/src/theory/strings/strategy.cpp b/src/theory/strings/strategy.cpp index 549bba9d6..7ead6f45f 100644 --- a/src/theory/strings/strategy.cpp +++ b/src/theory/strings/strategy.cpp @@ -2,10 +2,10 @@ /*! \file strategy.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** Andrew Reynolds ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/strategy.h b/src/theory/strings/strategy.h index 9afb6a92f..802994d06 100644 --- a/src/theory/strings/strategy.h +++ b/src/theory/strings/strategy.h @@ -4,8 +4,8 @@ ** Top contributors (to current version): ** Andrew Reynolds ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp index 928414523..874854be5 100644 --- a/src/theory/strings/strings_entail.cpp +++ b/src/theory/strings/strings_entail.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -111,95 +111,92 @@ bool StringsEntail::canConstantContainList(Node c, bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, std::vector<Node>& nr, int dir, - Node& curr) + Node& curr, + bool strict) { Assert(dir == 1 || dir == -1); Assert(nr.empty()); NodeManager* nm = NodeManager::currentNM(); Node zero = nm->mkConst(CVC4::Rational(0)); bool ret = false; - bool success; + bool success = true; unsigned sindex = 0; - do + while (success && curr != zero && sindex < n1.size()) { Assert(!curr.isNull()); success = false; - if (curr != zero && sindex < n1.size()) + unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); + if (n1[sindex_use].isConst()) { - unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); - if (n1[sindex_use].isConst()) + // could strip part of a constant + Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr)); + if (!lowerBound.isNull()) { - // could strip part of a constant - Node lowerBound = - ArithEntail::getConstantBound(Rewriter::rewrite(curr)); - if (!lowerBound.isNull()) + Assert(lowerBound.isConst()); + Rational lbr = lowerBound.getConst<Rational>(); + if (lbr.sgn() > 0) { - Assert(lowerBound.isConst()); - Rational lbr = lowerBound.getConst<Rational>(); - if (lbr.sgn() > 0) + Assert(ArithEntail::check(curr, true)); + Node s = n1[sindex_use]; + size_t slen = Word::getLength(s); + Node ncl = nm->mkConst(CVC4::Rational(slen)); + Node next_s = nm->mkNode(MINUS, lowerBound, ncl); + next_s = Rewriter::rewrite(next_s); + Assert(next_s.isConst()); + // we can remove the entire constant + if (next_s.getConst<Rational>().sgn() >= 0) { - Assert(ArithEntail::check(curr, true)); - Node s = n1[sindex_use]; - size_t slen = Word::getLength(s); - Node ncl = nm->mkConst(CVC4::Rational(slen)); - Node next_s = nm->mkNode(MINUS, lowerBound, ncl); - next_s = Rewriter::rewrite(next_s); - Assert(next_s.isConst()); - // we can remove the entire constant - if (next_s.getConst<Rational>().sgn() >= 0) + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); + success = true; + sindex++; + } + else + { + // we can remove part of the constant + // lower bound minus the length of a concrete string is negative, + // hence lowerBound cannot be larger than long max + Assert(lbr < Rational(String::maxSize())); + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); + uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); + Assert(lbsize < slen); + if (dir == 1) { - curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); - success = true; - sindex++; + // strip partially from the front + nr.push_back(Word::prefix(s, lbsize)); + n1[sindex_use] = Word::suffix(s, slen - lbsize); } else { - // we can remove part of the constant - // lower bound minus the length of a concrete string is negative, - // hence lowerBound cannot be larger than long max - Assert(lbr < Rational(String::maxSize())); - curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); - uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); - Assert(lbsize < slen); - if (dir == 1) - { - // strip partially from the front - nr.push_back(Word::prefix(s, lbsize)); - n1[sindex_use] = Word::suffix(s, slen - lbsize); - } - else - { - // strip partially from the back - nr.push_back(Word::suffix(s, lbsize)); - n1[sindex_use] = Word::prefix(s, slen - lbsize); - } - ret = true; + // strip partially from the back + nr.push_back(Word::suffix(s, lbsize)); + n1[sindex_use] = Word::prefix(s, slen - lbsize); } - Assert(ArithEntail::check(curr)); - } - else - { - // we cannot remove the constant + ret = true; } + Assert(ArithEntail::check(curr)); } - } - else - { - Node next_s = NodeManager::currentNM()->mkNode( - MINUS, - curr, - NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); - next_s = Rewriter::rewrite(next_s); - if (ArithEntail::check(next_s)) + else { - success = true; - curr = next_s; - sindex++; + // we cannot remove the constant } } } - } while (success); - if (sindex > 0) + else + { + Node next_s = NodeManager::currentNM()->mkNode( + MINUS, + curr, + NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); + next_s = Rewriter::rewrite(next_s); + if (ArithEntail::check(next_s)) + { + success = true; + curr = next_s; + sindex++; + } + } + } + if (sindex > 0 && (!strict || curr == zero)) { if (dir == 1) { @@ -225,6 +222,9 @@ int StringsEntail::componentContains(std::vector<Node>& n1, { Assert(nb.empty()); Assert(ne.empty()); + Trace("strings-entail") << "Component contains: " << std::endl; + Trace("strings-entail") << "n1 = " << n1 << std::endl; + Trace("strings-entail") << "n2 = " << n2 << std::endl; // if n2 is a singleton, we can do optimized version here if (n2.size() == 1) { @@ -301,6 +301,10 @@ int StringsEntail::componentContains(std::vector<Node>& n1, -1, computeRemainder && remainderDir != -1)) { + Trace("strings-entail-debug") + << "Last remainder begin is " << n1rb_last << std::endl; + Trace("strings-entail-debug") + << "Last remainder end is " << n1re_last << std::endl; Assert(n1rb_last.isNull()); if (computeRemainder) { @@ -325,6 +329,9 @@ int StringsEntail::componentContains(std::vector<Node>& n1, } } } + Trace("strings-entail-debug") << "ne = " << ne << std::endl; + Trace("strings-entail-debug") << "nb = " << nb << std::endl; + Trace("strings-entail-debug") << "...return " << i << std::endl; return i; } else @@ -444,12 +451,12 @@ bool StringsEntail::componentContainsBase( { return false; } - if (dir != 1) + if (dir != -1) { n1rb = nm->mkNode( STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos); } - if (dir != -1) + if (dir != 1) { n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); } diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h index 3eb77c5f5..be5743ef4 100644 --- a/src/theory/strings/strings_entail.h +++ b/src/theory/strings/strings_entail.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -67,10 +67,11 @@ class StringsEntail /** strip symbolic length * - * This function strips off components of n1 whose length is less than - * or equal to argument curr, and stores them in nr. The direction - * dir determines whether the components are removed from the start - * or end of n1. + * This function strips off components of n1 whose length is less than or + * equal to argument curr, and stores them in nr. The direction dir + * determines whether the components are removed from the start or end of n1. + * If `strict` is set to true, then the function only returns true if full + * length `curr` can be stripped. * * In detail, this function updates n1 to n1' such that: * If dir=1, @@ -107,7 +108,8 @@ class StringsEntail static bool stripSymbolicLength(std::vector<Node>& n1, std::vector<Node>& nr, int dir, - Node& curr); + Node& curr, + bool strict = false); /** component contains * This function is used when rewriting str.contains( t1, t2 ), where * n1 is the vector form of t1 diff --git a/src/theory/strings/strings_fmf.cpp b/src/theory/strings/strings_fmf.cpp index 9530171f0..bf34ceb1c 100644 --- a/src/theory/strings/strings_fmf.cpp +++ b/src/theory/strings/strings_fmf.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/strings_fmf.h b/src/theory/strings/strings_fmf.h index f66c23d5b..262790d0f 100644 --- a/src/theory/strings/strings_fmf.h +++ b/src/theory/strings/strings_fmf.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp index 76391cc0d..932b5c8cc 100644 --- a/src/theory/strings/strings_rewriter.cpp +++ b/src/theory/strings/strings_rewriter.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/strings_rewriter.h b/src/theory/strings/strings_rewriter.h index aadb11f1c..d0eee6089 100644 --- a/src/theory/strings/strings_rewriter.h +++ b/src/theory/strings/strings_rewriter.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index 613aa26c8..8274b7dc0 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -37,11 +37,10 @@ typedef expr::Attribute<StringsProxyVarAttributeId, bool> StringsProxyVarAttribute; TermRegistry::TermRegistry(SolverState& s, - eq::EqualityEngine& ee, OutputChannel& out, SequencesStatistics& statistics, ProofNodeManager* pnm) - : d_ee(ee), + : d_state(s), d_out(out), d_statistics(statistics), d_hasStrCode(false), @@ -50,10 +49,12 @@ TermRegistry::TermRegistry(SolverState& s, d_preregisteredTerms(s.getUserContext()), d_registeredTerms(s.getUserContext()), d_registeredTypes(s.getUserContext()), - d_proxyVar(s.getUserContext()), - d_proxyVarToLength(s.getUserContext()), d_lengthLemmaTermsCache(s.getUserContext()), - d_epg(nullptr) + d_epg(pnm ? new EagerProofGenerator( + pnm, + s.getUserContext(), + "strings::TermRegistry::EagerProofGenerator") + : nullptr) { NodeManager* nm = NodeManager::currentNM(); d_zero = nm->mkConst(Rational(0)); @@ -128,6 +129,7 @@ void TermRegistry::preRegisterTerm(TNode n) { return; } + eq::EqualityEngine* ee = d_state.getEqualityEngine(); d_preregisteredTerms.insert(n); Trace("strings-preregister") << "TheoryString::preregister : " << n << std::endl; @@ -136,8 +138,8 @@ void TermRegistry::preRegisterTerm(TNode n) if (!options::stringExp()) { if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_STRREPLALL - || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL + || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL + || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE) { @@ -155,15 +157,15 @@ void TermRegistry::preRegisterTerm(TNode n) ss << "Equality between regular expressions is not supported"; throw LogicException(ss.str()); } - d_ee.addTriggerEquality(n); + ee->addTriggerPredicate(n); return; } else if (k == STRING_IN_REGEXP) { d_out.requirePhase(n, true); - d_ee.addTriggerPredicate(n); - d_ee.addTerm(n[0]); - d_ee.addTerm(n[1]); + ee->addTriggerPredicate(n); + ee->addTerm(n[0]); + ee->addTerm(n[1]); return; } else if (k == STRING_TO_CODE) @@ -195,17 +197,21 @@ void TermRegistry::preRegisterTerm(TNode n) } } } - d_ee.addTerm(n); + ee->addTerm(n); } else if (tn.isBoolean()) { - // Get triggered for both equal and dis-equal - d_ee.addTriggerPredicate(n); + // All kinds that we do congruence over that may return a Boolean go here + if (k==STRING_STRCTN || k == STRING_LEQ || k == SEQ_NTH) + { + // Get triggered for both equal and dis-equal + ee->addTriggerPredicate(n); + } } else { // Function applications/predicates - d_ee.addTerm(n); + ee->addTerm(n); } // Set d_functionsTerms stores all function applications that are // relevant to theory combination. Notice that this is a subset of @@ -215,7 +221,7 @@ void TermRegistry::preRegisterTerm(TNode n) // Concatenation terms do not need to be considered here because // their arguments have string type and do not introduce any shared // terms. - if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT) + if (n.hasOperator() && ee->isFunctionKind(k) && k != STRING_CONCAT) { d_functionsTerms.push_back(n); } @@ -264,7 +270,7 @@ void TermRegistry::registerTerm(Node n, int effort) registerType(tn); Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl; - Node regTermLem; + TrustNode regTermLem; if (tn.isStringLike()) { // register length information: @@ -275,15 +281,29 @@ void TermRegistry::registerTerm(Node n, int effort) else if (n.getKind() != STRING_STRCTN) { // we don't send out eager reduction lemma for str.contains currently - regTermLem = eagerReduce(n, &d_skCache); + Node eagerRedLemma = eagerReduce(n, &d_skCache); + if (!eagerRedLemma.isNull()) + { + // if there was an eager reduction, we make the trust node for it + if (d_epg != nullptr) + { + regTermLem = d_epg->mkTrustNode( + eagerRedLemma, PfRule::STRING_EAGER_REDUCTION, {}, {n}); + } + else + { + regTermLem = TrustNode::mkTrustLemma(eagerRedLemma, nullptr); + } + } } if (!regTermLem.isNull()) { Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem << std::endl; - Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl; + Trace("strings-assert") + << "(assert " << regTermLem.getNode() << ")" << std::endl; ++(d_statistics.d_lemmasRegisterTerm); - d_out.lemma(regTermLem); + d_out.trustedLemma(regTermLem); } } @@ -298,14 +318,14 @@ void TermRegistry::registerType(TypeNode tn) { // preregister the empty word for the type Node emp = Word::mkEmptyWord(tn); - if (!d_ee.hasTerm(emp)) + if (!d_state.hasTerm(emp)) { preRegisterTerm(emp); } } } -Node TermRegistry::getRegisterTermLemma(Node n) +TrustNode TermRegistry::getRegisterTermLemma(Node n) { Assert(n.getType().isStringLike()); NodeManager* nm = NodeManager::currentNM(); @@ -321,7 +341,7 @@ Node TermRegistry::getRegisterTermLemma(Node n) if (lsum == lsumb) { registerTermAtomic(n, LENGTH_SPLIT); - return Node::null(); + return TrustNode::null(); } } Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym"); @@ -367,7 +387,12 @@ Node TermRegistry::getRegisterTermLemma(Node n) Node ret = nm->mkNode(AND, eq, ceq); - return ret; + // it is a simple rewrite to justify this + if (d_epg != nullptr) + { + return d_epg->mkTrustNode(ret, PfRule::MACRO_SR_PRED_INTRO, {}, {ret}); + } + return TrustNode::mkTrustLemma(ret, nullptr); } void TermRegistry::registerTermAtomic(Node n, LengthStatus s) @@ -384,14 +409,15 @@ void TermRegistry::registerTermAtomic(Node n, LengthStatus s) return; } std::map<Node, bool> reqPhase; - Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase); + TrustNode lenLem = getRegisterTermAtomicLemma(n, s, reqPhase); if (!lenLem.isNull()) { Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem << std::endl; - Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl; + Trace("strings-assert") + << "(assert " << lenLem.getNode() << ")" << std::endl; ++(d_statistics.d_lemmasRegisterTermAtomic); - d_out.lemma(lenLem); + d_out.trustedLemma(lenLem); } for (const std::pair<const Node, bool>& rp : reqPhase) { @@ -414,16 +440,15 @@ const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars() bool TermRegistry::hasStringCode() const { return d_hasStrCode; } -Node TermRegistry::getRegisterTermAtomicLemma(Node n, - LengthStatus s, - std::map<Node, bool>& reqPhase) +TrustNode TermRegistry::getRegisterTermAtomicLemma( + Node n, LengthStatus s, std::map<Node, bool>& reqPhase) { if (n.isConst()) { // No need to send length for constant terms. This case may be triggered // for cases where the skolem cache automatically replaces a skolem by // a constant. - return Node::null(); + return TrustNode::null(); } Assert(n.getType().isStringLike()); NodeManager* nm = NodeManager::currentNM(); @@ -437,7 +462,7 @@ Node TermRegistry::getRegisterTermAtomicLemma(Node n, Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one << std::endl; Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl; - return len_geq_one; + return TrustNode::mkTrustLemma(len_geq_one, nullptr); } if (s == LENGTH_ONE) @@ -446,7 +471,7 @@ Node TermRegistry::getRegisterTermAtomicLemma(Node n, Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one << std::endl; Trace("strings-assert") << "(assert " << len_one << ")" << std::endl; - return len_one; + return TrustNode::mkTrustLemma(len_one, nullptr); } Assert(s == LENGTH_SPLIT); @@ -477,7 +502,11 @@ Node TermRegistry::getRegisterTermAtomicLemma(Node n, Assert(!case_emptyr.getConst<bool>()); } - return lenLemma; + if (d_epg != nullptr) + { + return d_epg->mkTrustNode(lenLemma, PfRule::STRING_LENGTH_POS, {}, {n}); + } + return TrustNode::mkTrustLemma(lenLemma, nullptr); } Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const @@ -526,7 +555,7 @@ Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const Node TermRegistry::getProxyVariableFor(Node n) const { - NodeNodeMap::const_iterator it = d_proxyVar.find(n); + std::map<Node, Node>::const_iterator it = d_proxyVar.find(n); if (it != d_proxyVar.end()) { return (*it).second; @@ -534,6 +563,18 @@ Node TermRegistry::getProxyVariableFor(Node n) const return Node::null(); } +Node TermRegistry::ensureProxyVariableFor(Node n) +{ + Node proxy = getProxyVariableFor(n); + if (proxy.isNull()) + { + registerTerm(n, 0); + proxy = getProxyVariableFor(n); + } + Assert(!proxy.isNull()); + return proxy; +} + void TermRegistry::inferSubstitutionProxyVars(Node n, std::vector<Node>& vars, std::vector<Node>& subs, diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h index 4d5a91d97..a713cc60f 100644 --- a/src/theory/strings/term_registry.h +++ b/src/theory/strings/term_registry.h @@ -2,10 +2,10 @@ /*! \file term_registry.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tim King, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli, Tim King ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -50,7 +50,6 @@ class TermRegistry public: TermRegistry(SolverState& s, - eq::EqualityEngine& ee, OutputChannel& out, SequencesStatistics& statistics, ProofNodeManager* pnm); @@ -168,6 +167,15 @@ class TermRegistry */ Node getProxyVariableFor(Node n) const; + /** + * Get the proxy variable for a term. If the proxy variable does not exist, + * this method registers the term and then returns its proxy variable. + * + * @param n The term + * @return Proxy variable for `n` + */ + Node ensureProxyVariableFor(Node n); + /** infer substitution proxy vars * * This method attempts to (partially) convert the formula n into a @@ -209,8 +217,8 @@ class TermRegistry Node d_negOne; /** the cardinality of the alphabet */ uint32_t d_cardSize; - /** Reference to equality engine of the theory of strings. */ - eq::EqualityEngine& d_ee; + /** Reference to the solver state of the theory of strings. */ + SolverState& d_state; /** Reference to the output channel of the theory of strings. */ OutputChannel& d_out; /** Reference to the statistics for the theory of strings/sequences. */ @@ -241,12 +249,12 @@ class TermRegistry * which rewrites to 3 = 3. * In the above example, we store "ABC" -> v_{"ABC"} in this map. */ - NodeNodeMap d_proxyVar; + std::map<Node, Node> d_proxyVar; /** * Map from proxy variables to their normalized length. In the above example, * we store "ABC" -> 3. */ - NodeNodeMap d_proxyVarToLength; + std::map<Node, Node> d_proxyVarToLength; /** List of terms that we have register length for */ NodeSet d_lengthLemmaTermsCache; /** Proof generator, manages proofs for lemmas generated by this class */ @@ -267,7 +275,7 @@ class TermRegistry * If n is an atomic term, the method registerTermAtomic is called for n * and s = LENGTH_SPLIT and no lemma is returned. */ - Node getRegisterTermLemma(Node n); + TrustNode getRegisterTermLemma(Node n); /** * Get the lemma required for registering the length information for * atomic term n given length status s. For details, see registerTermAtomic. @@ -276,9 +284,9 @@ class TermRegistry * argument reqPhase, which should be processed by a call to requiredPhase by * the caller of this method. */ - Node getRegisterTermAtomicLemma(Node n, - LengthStatus s, - std::map<Node, bool>& reqPhase); + TrustNode getRegisterTermAtomicLemma(Node n, + LengthStatus s, + std::map<Node, bool>& reqPhase); }; } // namespace strings diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 150ea8977..a9e2c0051 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -2,10 +2,10 @@ /*! \file theory_strings.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Andres Noetzli + ** Andrew Reynolds, Tianyi Liang, Yoni Zohar ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -15,6 +15,7 @@ #include "theory/strings/theory_strings.h" #include "expr/kind.h" +#include "options/smt_options.h" #include "options/strings_options.h" #include "options/theory_options.h" #include "smt/logic_exception.h" @@ -43,61 +44,30 @@ TheoryStrings::TheoryStrings(context::Context* c, : Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo, pnm), d_notify(*this), d_statistics(), - d_equalityEngine(d_notify, c, "theory::strings::ee", true), - d_state(c, u, d_equalityEngine, d_valuation), - d_termReg(d_state, d_equalityEngine, out, d_statistics, nullptr), - d_im(nullptr), + d_state(c, u, d_valuation), + d_termReg(d_state, out, d_statistics, pnm), + d_extTheoryCb(), + d_extTheory(d_extTheoryCb, c, u, out), + d_im(*this, d_state, d_termReg, d_extTheory, d_statistics, pnm), d_rewriter(&d_statistics.d_rewrites), - d_bsolver(nullptr), - d_csolver(nullptr), - d_esolver(nullptr), - d_rsolver(nullptr), + d_bsolver(d_state, d_im), + d_csolver(d_state, d_im, d_termReg, d_bsolver), + d_esolver(d_state, + d_im, + d_termReg, + d_rewriter, + d_bsolver, + d_csolver, + d_extTheory, + d_statistics), + d_rsolver(d_state, + d_im, + d_termReg.getSkolemCache(), + d_csolver, + d_esolver, + d_statistics), d_stringsFmf(c, u, valuation, d_termReg) { - setupExtTheory(); - ExtTheory* extt = getExtTheory(); - // initialize the inference manager, which requires the extended theory - d_im.reset( - new InferenceManager(c, u, d_state, d_termReg, *extt, out, d_statistics)); - // initialize the solvers - d_bsolver.reset(new BaseSolver(d_state, *d_im)); - d_csolver.reset(new CoreSolver(c, u, d_state, *d_im, d_termReg, *d_bsolver)); - d_esolver.reset(new ExtfSolver(c, - u, - d_state, - *d_im, - d_termReg, - d_rewriter, - *d_bsolver, - *d_csolver, - *extt, - d_statistics)); - d_rsolver.reset(new RegExpSolver( - d_state, *d_im, *d_csolver, *d_esolver, d_statistics, c, u)); - - // The kinds we are treating as function application in congruence - d_equalityEngine.addFunctionKind(kind::STRING_LENGTH); - d_equalityEngine.addFunctionKind(kind::STRING_CONCAT); - d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP); - d_equalityEngine.addFunctionKind(kind::STRING_TO_CODE); - d_equalityEngine.addFunctionKind(kind::SEQ_UNIT); - - // extended functions - d_equalityEngine.addFunctionKind(kind::STRING_STRCTN); - d_equalityEngine.addFunctionKind(kind::STRING_LEQ); - d_equalityEngine.addFunctionKind(kind::STRING_SUBSTR); - d_equalityEngine.addFunctionKind(kind::STRING_UPDATE); - d_equalityEngine.addFunctionKind(kind::STRING_ITOS); - d_equalityEngine.addFunctionKind(kind::STRING_STOI); - d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF); - d_equalityEngine.addFunctionKind(kind::STRING_STRREPL); - d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); - d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE); - d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE_ALL); - d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); - d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER); - d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER); - d_equalityEngine.addFunctionKind(kind::STRING_REV); d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); @@ -106,6 +76,20 @@ TheoryStrings::TheoryStrings(context::Context* c, d_false = NodeManager::currentNM()->mkConst( false ); d_cardSize = utils::getAlphabetCardinality(); + + // set up the extended function callback + d_extTheoryCb.d_esolver = &d_esolver; + + ProofChecker* pc = pnm != nullptr ? pnm->getChecker() : nullptr; + if (pc != nullptr) + { + // add checkers + d_sProofChecker.registerTo(pc); + } + // use the state object as the official theory state + d_theoryState = &d_state; + // use the inference manager as the official inference manager + d_inferManager = &d_im; } TheoryStrings::~TheoryStrings() { @@ -113,27 +97,63 @@ TheoryStrings::~TheoryStrings() { } TheoryRewriter* TheoryStrings::getTheoryRewriter() { return &d_rewriter; } -std::string TheoryStrings::identify() const -{ - return std::string("TheoryStrings"); -} -eq::EqualityEngine* TheoryStrings::getEqualityEngine() + +bool TheoryStrings::needsEqualityEngine(EeSetupInfo& esi) { - return &d_equalityEngine; + esi.d_notify = &d_notify; + esi.d_name = "theory::strings::ee"; + return true; } + void TheoryStrings::finishInit() { - TheoryModel* tm = d_valuation.getModel(); + Assert(d_equalityEngine != nullptr); + // witness is used to eliminate str.from_code - tm->setUnevaluatedKind(WITNESS); + d_valuation.setUnevaluatedKind(WITNESS); + + bool eagerEval = options::stringEagerEval(); + // The kinds we are treating as function application in congruence + d_equalityEngine->addFunctionKind(kind::STRING_LENGTH, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_CONCAT, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_IN_REGEXP, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_TO_CODE, eagerEval); + d_equalityEngine->addFunctionKind(kind::SEQ_UNIT, eagerEval); + // `seq.nth` is not always defined, and so we do not evaluate it eagerly. + d_equalityEngine->addFunctionKind(kind::SEQ_NTH, false); + // extended functions + d_equalityEngine->addFunctionKind(kind::STRING_STRCTN, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_LEQ, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_SUBSTR, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_UPDATE, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE_ALL, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_TOLOWER, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_TOUPPER, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_REV, eagerEval); +} + +std::string TheoryStrings::identify() const +{ + return std::string("TheoryStrings"); } bool TheoryStrings::areCareDisequal( TNode x, TNode y ) { - Assert(d_equalityEngine.hasTerm(x)); - Assert(d_equalityEngine.hasTerm(y)); - if( d_equalityEngine.isTriggerTerm(x, THEORY_STRINGS) && d_equalityEngine.isTriggerTerm(y, THEORY_STRINGS) ){ - TNode x_shared = d_equalityEngine.getTriggerTermRepresentative(x, THEORY_STRINGS); - TNode y_shared = d_equalityEngine.getTriggerTermRepresentative(y, THEORY_STRINGS); + Assert(d_equalityEngine->hasTerm(x)); + Assert(d_equalityEngine->hasTerm(y)); + if (d_equalityEngine->isTriggerTerm(x, THEORY_STRINGS) + && d_equalityEngine->isTriggerTerm(y, THEORY_STRINGS)) + { + TNode x_shared = + d_equalityEngine->getTriggerTermRepresentative(x, THEORY_STRINGS); + TNode y_shared = + d_equalityEngine->getTriggerTermRepresentative(y, THEORY_STRINGS); EqualityStatus eqStatus = d_valuation.getEqualityStatus(x_shared, y_shared); if( eqStatus==EQUALITY_FALSE_AND_PROPAGATED || eqStatus==EQUALITY_FALSE || eqStatus==EQUALITY_FALSE_IN_MODEL ){ return true; @@ -142,51 +162,32 @@ bool TheoryStrings::areCareDisequal( TNode x, TNode y ) { return false; } -void TheoryStrings::setMasterEqualityEngine(eq::EqualityEngine* eq) { - d_equalityEngine.setMasterEqualityEngine(eq); -} - -void TheoryStrings::addSharedTerm(TNode t) { - Debug("strings") << "TheoryStrings::addSharedTerm(): " - << t << " " << t.getType().isBoolean() << endl; - d_equalityEngine.addTriggerTerm(t, THEORY_STRINGS); +void TheoryStrings::notifySharedTerm(TNode t) +{ + Debug("strings") << "TheoryStrings::notifySharedTerm(): " << t << " " + << t.getType().isBoolean() << endl; if (options::stringExp()) { - getExtTheory()->registerTermRec(t); - } - Debug("strings") << "TheoryStrings::addSharedTerm() finished" << std::endl; -} - -EqualityStatus TheoryStrings::getEqualityStatus(TNode a, TNode b) { - if( d_equalityEngine.hasTerm(a) && d_equalityEngine.hasTerm(b) ){ - if (d_equalityEngine.areEqual(a, b)) { - // The terms are implied to be equal - return EQUALITY_TRUE; - } - if (d_equalityEngine.areDisequal(a, b, false)) { - // The terms are implied to be dis-equal - return EQUALITY_FALSE; - } + d_esolver.addSharedTerm(t); } - return EQUALITY_UNKNOWN; -} - -void TheoryStrings::propagate(Effort e) { - // direct propagation now + Debug("strings") << "TheoryStrings::notifySharedTerm() finished" << std::endl; } -bool TheoryStrings::propagate(TNode literal) { - Debug("strings-propagate") << "TheoryStrings::propagate(" << literal << ")" << std::endl; +bool TheoryStrings::propagateLit(TNode literal) +{ + Debug("strings-propagate") + << "TheoryStrings::propagateLit(" << literal << ")" << std::endl; // If already in conflict, no more propagation if (d_state.isInConflict()) { - Debug("strings-propagate") << "TheoryStrings::propagate(" << literal << "): already in conflict" << std::endl; + Debug("strings-propagate") << "TheoryStrings::propagateLit(" << literal + << "): already in conflict" << std::endl; return false; } // Propagate out bool ok = d_out->propagate(literal); if (!ok) { - d_state.setConflict(); + d_state.notifyInConflict(); } return ok; } @@ -194,29 +195,7 @@ bool TheoryStrings::propagate(TNode literal) { TrustNode TheoryStrings::explain(TNode literal) { Debug("strings-explain") << "explain called on " << literal << std::endl; - std::vector< TNode > assumptions; - d_im->explain(literal, assumptions); - Node ret; - if( assumptions.empty() ){ - ret = d_true; - }else if( assumptions.size()==1 ){ - ret = assumptions[0]; - }else{ - ret = NodeManager::currentNM()->mkNode(kind::AND, assumptions); - } - return TrustNode::mkTrustPropExp(literal, ret, nullptr); -} - -bool TheoryStrings::getCurrentSubstitution( int effort, std::vector< Node >& vars, - std::vector< Node >& subs, std::map< Node, std::vector< Node > >& exp ) { - Trace("strings-subs") << "getCurrentSubstitution, effort = " << effort << std::endl; - for( unsigned i=0; i<vars.size(); i++ ){ - Node n = vars[i]; - Trace("strings-subs") << " get subs for " << n << "..." << std::endl; - Node s = d_esolver->getCurrentSubstitutionFor(effort, n, exp[n]); - subs.push_back(s); - } - return true; + return d_im.explainLit(literal); } void TheoryStrings::presolve() { @@ -242,23 +221,10 @@ void TheoryStrings::presolve() { // MODEL GENERATION ///////////////////////////////////////////////////////////////////////////// -bool TheoryStrings::collectModelInfo(TheoryModel* m) +bool TheoryStrings::collectModelValues(TheoryModel* m, + const std::set<Node>& termSet) { - Trace("strings-model") << "TheoryStrings : Collect model info" << std::endl; - Trace("strings-model") << "TheoryStrings : assertEqualityEngine." << std::endl; - - std::set<Node> termSet; - - // Compute terms appearing in assertions and shared terms - computeRelevantTerms(termSet); - // assert the (relevant) portion of the equality engine to the model - if (!m->assertEqualityEngine(&d_equalityEngine, &termSet)) - { - Unreachable() - << "TheoryStrings::collectModelInfo: failed to assert equality engine" - << std::endl; - return false; - } + Trace("strings-model") << "TheoryStrings : Collect model values" << std::endl; std::map<TypeNode, std::unordered_set<Node, NodeHashFunction> > repSet; // Generate model @@ -302,7 +268,7 @@ bool TheoryStrings::collectModelInfoType( std::map< Node, Node > processed; //step 1 : get all values for known lengths std::vector< Node > lts_values; - std::map<unsigned, Node> values_used; + std::map<std::size_t, Node> values_used; std::vector<Node> len_splits; for( unsigned i=0; i<col.size(); i++ ) { Trace("strings-model") << "Checking length for {"; @@ -329,15 +295,16 @@ bool TheoryStrings::collectModelInfoType( else { // must throw logic exception if we cannot construct the string - if (len_value.getConst<Rational>() > Rational(String::maxSize())) + if (len_value.getConst<Rational>() > String::maxSize()) { std::stringstream ss; - ss << "Cannot generate model with string whose length exceeds UINT32_MAX"; + ss << "The model was computed to have strings of length " << len_value + << ". We only allow strings up to length " << String::maxSize(); throw LogicException(ss.str()); } - unsigned lvalue = + std::size_t lvalue = len_value.getConst<Rational>().getNumerator().toUnsignedInt(); - std::map<unsigned, Node>::iterator itvu = values_used.find(lvalue); + auto itvu = values_used.find(lvalue); if (itvu == values_used.end()) { values_used[lvalue] = lts[i]; @@ -364,14 +331,17 @@ bool TheoryStrings::collectModelInfoType( //check if col[i][j] has only variables if (!eqc.isConst()) { - NormalForm& nfe = d_csolver->getNormalForm(eqc); + NormalForm& nfe = d_csolver.getNormalForm(eqc); if (nfe.d_nf.size() == 1) { // is it an equivalence class with a seq.unit term? if (nfe.d_nf[0].getKind() == SEQ_UNIT) { - pure_eq_assign[eqc] = nfe.d_nf[0]; + Node c = Rewriter::rewrite(nm->mkNode( + SEQ_UNIT, d_valuation.getModelValue(nfe.d_nf[0][0]))); + pure_eq_assign[eqc] = c; Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") "; + m->getEqualityEngine()->addTerm(c); } // does it have a code and the length of these equivalence classes are // one? @@ -399,6 +369,12 @@ bool TheoryStrings::collectModelInfoType( else { processed[eqc] = eqc; + // Make sure that constants are asserted to the theory model that we + // are building. It is possible that new constants were introduced by + // the eager evaluation in the equality engine. These terms are missing + // in the term set and, as a result, are skipped when the equality + // engine is asserted to the theory model. + m->getEqualityEngine()->addTerm(eqc); } } Trace("strings-model") << "have length " << lts_values[i] << std::endl; @@ -407,7 +383,7 @@ bool TheoryStrings::collectModelInfoType( if( !pure_eq.empty() ){ if( lts_values[i].isNull() ){ // start with length two (other lengths have special precendence) - unsigned lvalue = 2; + std::size_t lvalue = 2; while( values_used.find( lvalue )!=values_used.end() ){ lvalue++; } @@ -516,7 +492,7 @@ bool TheoryStrings::collectModelInfoType( { if (processed.find(rn) == processed.end()) { - NormalForm& nf = d_csolver->getNormalForm(rn); + NormalForm& nf = d_csolver.getNormalForm(rn); if (Trace.isOn("strings-model")) { Trace("strings-model") @@ -604,30 +580,68 @@ TrustNode TheoryStrings::expandDefinition(Node node) ITE, cond, t.eqNode(nm->mkNode(STRING_TO_CODE, k)), k.eqNode(emp))); return TrustNode::mkTrustRewrite(node, ret, nullptr); } - return TrustNode::null(); } -void TheoryStrings::check(Effort e) { - if (done() && e<EFFORT_FULL) { - return; +bool TheoryStrings::preNotifyFact( + TNode atom, bool pol, TNode fact, bool isPrereg, bool isInternal) +{ + // this is only required for internal facts, others are already registered + if (isInternal && atom.getKind() == EQUAL) + { + // we must ensure these terms are registered + for (const Node& t : atom) + { + // terms in the equality engine are already registered, hence skip + // currently done for only string-like terms, but this could potentially + // be avoided. + if (!d_equalityEngine->hasTerm(t) && t.getType().isStringLike()) + { + d_termReg.registerTerm(t, 0); + } + } } + return false; +} - TimerStat::CodeTimer checkTimer(d_checkTime); - - // Trace("strings-process") << "Theory of strings, check : " << e << std::endl; - Trace("strings-check-debug") - << "Theory of strings, check : " << e << std::endl; - while (!done() && !d_state.isInConflict()) +void TheoryStrings::notifyFact(TNode atom, + bool polarity, + TNode fact, + bool isInternal) +{ + if (atom.getKind() == STRING_IN_REGEXP) { - // Get all the assertions - Assertion assertion = get(); - TNode fact = assertion.d_assertion; - - Trace("strings-assertion") << "get assertion: " << fact << endl; - d_im->sendAssumption(fact); + if (polarity && atom[1].getKind() == REGEXP_CONCAT) + { + Node eqc = d_equalityEngine->getRepresentative(atom[0]); + d_state.addEndpointsToEqcInfo(atom, atom[1], eqc); + } + } + // process pending conflicts due to reasoning about endpoints + if (!d_state.isInConflict() && d_state.hasPendingConflict()) + { + InferInfo iiPendingConf; + d_state.getPendingConflict(iiPendingConf); + Trace("strings-pending") + << "Process pending conflict " << iiPendingConf.d_ant << std::endl; + Trace("strings-conflict") + << "CONFLICT: Eager : " << iiPendingConf.d_ant << std::endl; + ++(d_statistics.d_conflictsEager); + // call the inference manager to send the conflict + d_im.processConflict(iiPendingConf); + return; } - d_im->doPendingFacts(); + Trace("strings-pending-debug") << " Now collect terms" << std::endl; + // Collect extended function terms in the atom. Notice that we must register + // all extended functions occurring in assertions and shared terms. We + // make a similar call to registerTermRec in TheoryStrings::addSharedTerm. + d_extTheory.registerTermRec(atom); + Trace("strings-pending-debug") << " Finished collect terms" << std::endl; +} + +void TheoryStrings::postCheck(Effort e) +{ + d_im.doPendingFacts(); Assert(d_strat.isStrategyInit()); if (!d_state.isInConflict() && !d_valuation.needCheck() @@ -635,16 +649,19 @@ void TheoryStrings::check(Effort e) { { Trace("strings-check-debug") << "Theory of strings " << e << " effort check " << std::endl; - if(Trace.isOn("strings-eqc")) { - for( unsigned t=0; t<2; t++ ) { - eq::EqClassesIterator eqcs2_i = eq::EqClassesIterator( &d_equalityEngine ); + if (Trace.isOn("strings-eqc")) + { + for (unsigned t = 0; t < 2; t++) + { + eq::EqClassesIterator eqcs2_i = eq::EqClassesIterator(d_equalityEngine); Trace("strings-eqc") << (t==0 ? "STRINGS:" : "OTHER:") << std::endl; while( !eqcs2_i.isFinished() ){ Node eqc = (*eqcs2_i); bool print = (t == 0 && eqc.getType().isStringLike()) || (t == 1 && !eqc.getType().isStringLike()); if (print) { - eq::EqClassIterator eqc2_i = eq::EqClassIterator( eqc, &d_equalityEngine ); + eq::EqClassIterator eqc2_i = + eq::EqClassIterator(eqc, d_equalityEngine); Trace("strings-eqc") << "Eqc( " << eqc << " ) : { "; while( !eqc2_i.isFinished() ) { if( (*eqc2_i)!=eqc && (*eqc2_i).getKind()!=kind::EQUAL ){ @@ -672,58 +689,64 @@ void TheoryStrings::check(Effort e) { Trace("strings-eqc") << std::endl; } ++(d_statistics.d_checkRuns); - bool addedLemma = false; - bool addedFact; + bool sentLemma = false; + bool hadPending = false; Trace("strings-check") << "Full effort check..." << std::endl; do{ + d_im.reset(); ++(d_statistics.d_strategyRuns); Trace("strings-check") << " * Run strategy..." << std::endl; runStrategy(e); - // flush the facts - addedFact = d_im->hasPendingFact(); - addedLemma = d_im->hasPendingLemma(); - d_im->doPendingFacts(); - d_im->doPendingLemmas(); + // remember if we had pending facts or lemmas + hadPending = d_im.hasPending(); + // Send the facts *and* the lemmas. We send lemmas regardless of whether + // we send facts since some lemmas cannot be dropped. Other lemmas are + // otherwise avoided by aborting the strategy when a fact is ready. + d_im.doPending(); + // Did we successfully send a lemma? Notice that if hasPending = true + // and sentLemma = false, then the above call may have: + // (1) had no pending lemmas, but successfully processed pending facts, + // (2) unsuccessfully processed pending lemmas. + // In either case, we repeat the strategy if we are not in conflict. + sentLemma = d_im.hasSentLemma(); if (Trace.isOn("strings-check")) { Trace("strings-check") << " ...finish run strategy: "; - Trace("strings-check") << (addedFact ? "addedFact " : ""); - Trace("strings-check") << (addedLemma ? "addedLemma " : ""); + Trace("strings-check") << (hadPending ? "hadPending " : ""); + Trace("strings-check") << (sentLemma ? "sentLemma " : ""); Trace("strings-check") << (d_state.isInConflict() ? "conflict " : ""); - if (!addedFact && !addedLemma && !d_state.isInConflict()) + if (!hadPending && !sentLemma && !d_state.isInConflict()) { Trace("strings-check") << "(none)"; } Trace("strings-check") << std::endl; } - // repeat if we did not add a lemma or conflict - } while (!d_state.isInConflict() && !addedLemma && addedFact); + // repeat if we did not add a lemma or conflict, and we had pending + // facts or lemmas. + } while (!d_state.isInConflict() && !sentLemma && hadPending); } Trace("strings-check") << "Theory of strings, done check : " << e << std::endl; - Assert(!d_im->hasPendingFact()); - Assert(!d_im->hasPendingLemma()); + Assert(!d_im.hasPendingFact()); + Assert(!d_im.hasPendingLemma()); } bool TheoryStrings::needsCheckLastEffort() { if( options::stringGuessModel() ){ - return d_esolver->hasExtendedFunctions(); + return d_esolver.hasExtendedFunctions(); } return false; } /** Conflict when merging two constants */ void TheoryStrings::conflict(TNode a, TNode b){ - if (!d_state.isInConflict()) + if (d_state.isInConflict()) { - Debug("strings-conflict") << "Making conflict..." << std::endl; - d_state.setConflict(); - TrustNode conflictNode = explain(a.eqNode(b)); - Trace("strings-conflict") - << "CONFLICT: Eq engine conflict : " << conflictNode.getNode() - << std::endl; - ++(d_statistics.d_conflictsEqEngine); - d_out->conflict(conflictNode.getNode()); + // already in conflict + return; } + d_im.conflictEqConstantMerge(a, b); + Trace("strings-conflict") << "CONFLICT: Eq engine conflict" << std::endl; + ++(d_statistics.d_conflictsEqEngine); } void TheoryStrings::eqNotifyNewClass(TNode t){ @@ -746,20 +769,26 @@ void TheoryStrings::addCarePairs(TNodeTrie* t1, if( t2!=NULL ){ Node f1 = t1->getData(); Node f2 = t2->getData(); - if( !d_equalityEngine.areEqual( f1, f2 ) ){ + if (!d_equalityEngine->areEqual(f1, f2)) + { Trace("strings-cg-debug") << "TheoryStrings::computeCareGraph(): checking function " << f1 << " and " << f2 << std::endl; vector< pair<TNode, TNode> > currentPairs; for (unsigned k = 0; k < f1.getNumChildren(); ++ k) { TNode x = f1[k]; TNode y = f2[k]; - Assert(d_equalityEngine.hasTerm(x)); - Assert(d_equalityEngine.hasTerm(y)); - Assert(!d_equalityEngine.areDisequal(x, y, false)); + Assert(d_equalityEngine->hasTerm(x)); + Assert(d_equalityEngine->hasTerm(y)); + Assert(!d_equalityEngine->areDisequal(x, y, false)); Assert(!areCareDisequal(x, y)); - if( !d_equalityEngine.areEqual( x, y ) ){ - if( d_equalityEngine.isTriggerTerm(x, THEORY_STRINGS) && d_equalityEngine.isTriggerTerm(y, THEORY_STRINGS) ){ - TNode x_shared = d_equalityEngine.getTriggerTermRepresentative(x, THEORY_STRINGS); - TNode y_shared = d_equalityEngine.getTriggerTermRepresentative(y, THEORY_STRINGS); + if (!d_equalityEngine->areEqual(x, y)) + { + if (d_equalityEngine->isTriggerTerm(x, THEORY_STRINGS) + && d_equalityEngine->isTriggerTerm(y, THEORY_STRINGS)) + { + TNode x_shared = d_equalityEngine->getTriggerTermRepresentative( + x, THEORY_STRINGS); + TNode y_shared = d_equalityEngine->getTriggerTermRepresentative( + y, THEORY_STRINGS); currentPairs.push_back(make_pair(x_shared, y_shared)); } } @@ -787,7 +816,8 @@ void TheoryStrings::addCarePairs(TNodeTrie* t1, std::map<TNode, TNodeTrie>::iterator it2 = it; ++it2; for( ; it2 != t1->d_data.end(); ++it2 ){ - if( !d_equalityEngine.areDisequal(it->first, it2->first, false) ){ + if (!d_equalityEngine->areDisequal(it->first, it2->first, false)) + { if( !areCareDisequal(it->first, it2->first) ){ addCarePairs( &it->second, &it2->second, arity, depth+1 ); } @@ -800,7 +830,7 @@ void TheoryStrings::addCarePairs(TNodeTrie* t1, { for (std::pair<const TNode, TNodeTrie>& tt2 : t2->d_data) { - if (!d_equalityEngine.areDisequal(tt1.first, tt2.first, false)) + if (!d_equalityEngine->areDisequal(tt1.first, tt2.first, false)) { if (!areCareDisequal(tt1.first, tt2.first)) { @@ -829,8 +859,9 @@ void TheoryStrings::computeCareGraph(){ std::vector< TNode > reps; bool has_trigger_arg = false; for( unsigned j=0; j<f1.getNumChildren(); j++ ){ - reps.push_back( d_equalityEngine.getRepresentative( f1[j] ) ); - if( d_equalityEngine.isTriggerTerm( f1[j], THEORY_STRINGS ) ){ + reps.push_back(d_equalityEngine->getRepresentative(f1[j])); + if (d_equalityEngine->isTriggerTerm(f1[j], THEORY_STRINGS)) + { has_trigger_arg = true; } } @@ -853,14 +884,14 @@ void TheoryStrings::computeCareGraph(){ void TheoryStrings::checkRegisterTermsPreNormalForm() { - const std::vector<Node>& seqc = d_bsolver->getStringEqc(); + const std::vector<Node>& seqc = d_bsolver.getStringEqc(); for (const Node& eqc : seqc) { - eq::EqClassIterator eqc_i = eq::EqClassIterator(eqc, &d_equalityEngine); + eq::EqClassIterator eqc_i = eq::EqClassIterator(eqc, d_equalityEngine); while (!eqc_i.isFinished()) { Node n = (*eqc_i); - if (!d_bsolver->isCongruent(n)) + if (!d_bsolver.isCongruent(n)) { d_termReg.registerTerm(n, 2); } @@ -882,10 +913,10 @@ void TheoryStrings::checkCodes() // str.code applied to the proxy variables for each equivalence classes that // are constants of size one std::vector<Node> const_codes; - const std::vector<Node>& seqc = d_bsolver->getStringEqc(); + const std::vector<Node>& seqc = d_bsolver.getStringEqc(); for (const Node& eqc : seqc) { - NormalForm& nfe = d_csolver->getNormalForm(eqc); + NormalForm& nfe = d_csolver.getNormalForm(eqc); if (nfe.d_nf.size() == 1 && nfe.d_nf[0].isConst()) { Node c = nfe.d_nf[0]; @@ -894,13 +925,12 @@ void TheoryStrings::checkCodes() Node cc = nm->mkNode(kind::STRING_TO_CODE, c); cc = Rewriter::rewrite(cc); Assert(cc.isConst()); - Node cp = d_termReg.getProxyVariableFor(c); - AlwaysAssert(!cp.isNull()); + Node cp = d_termReg.ensureProxyVariableFor(c); Node vc = nm->mkNode(STRING_TO_CODE, cp); if (!d_state.areEqual(cc, vc)) { std::vector<Node> emptyVec; - d_im->sendInference(emptyVec, cc.eqNode(vc), Inference::CODE_PROXY); + d_im.sendInference(emptyVec, cc.eqNode(vc), Inference::CODE_PROXY); } const_codes.push_back(vc); } @@ -914,7 +944,7 @@ void TheoryStrings::checkCodes() } } } - if (d_im->hasProcessed()) + if (d_im.hasProcessed()) { return; } @@ -937,9 +967,10 @@ void TheoryStrings::checkCodes() Node eqn = c1[0].eqNode(c2[0]); // str.code(x)==-1 V str.code(x)!=str.code(y) V x==y Node inj_lem = nm->mkNode(kind::OR, eq_no, deq, eqn); - d_im->sendPhaseRequirement(deq, false); + deq = Rewriter::rewrite(deq); + d_im.addPendingPhaseRequirement(deq, false); std::vector<Node> emptyVec; - d_im->sendInference(emptyVec, inj_lem, Inference::CODE_INJ); + d_im.sendInference(emptyVec, inj_lem, Inference::CODE_INJ); } } } @@ -948,10 +979,10 @@ void TheoryStrings::checkCodes() void TheoryStrings::checkRegisterTermsNormalForms() { - const std::vector<Node>& seqc = d_bsolver->getStringEqc(); + const std::vector<Node>& seqc = d_bsolver.getStringEqc(); for (const Node& eqc : seqc) { - NormalForm& nfi = d_csolver->getNormalForm(eqc); + NormalForm& nfi = d_csolver.getNormalForm(eqc); // check if there is a length term for this equivalence class EqcInfo* ei = d_state.getOrMakeEqcInfo(eqc, false); Node lt = ei ? ei->d_lengthTerm : Node::null(); @@ -982,7 +1013,7 @@ TrustNode TheoryStrings::ppRewrite(TNode atom) if( !options::stringLazyPreproc() ){ //eager preprocess here std::vector< Node > new_nodes; - StringsPreprocess* p = d_esolver->getPreprocess(); + StringsPreprocess* p = d_esolver.getPreprocess(); Node ret = p->processAssertion(atomRet, new_nodes); if (ret != atomRet) { @@ -1018,25 +1049,25 @@ void TheoryStrings::runInferStep(InferStep s, int effort) Trace("strings-process") << "..." << std::endl; switch (s) { - case CHECK_INIT: d_bsolver->checkInit(); break; - case CHECK_CONST_EQC: d_bsolver->checkConstantEquivalenceClasses(); break; - case CHECK_EXTF_EVAL: d_esolver->checkExtfEval(effort); break; - case CHECK_CYCLES: d_csolver->checkCycles(); break; - case CHECK_FLAT_FORMS: d_csolver->checkFlatForms(); break; + case CHECK_INIT: d_bsolver.checkInit(); break; + case CHECK_CONST_EQC: d_bsolver.checkConstantEquivalenceClasses(); break; + case CHECK_EXTF_EVAL: d_esolver.checkExtfEval(effort); break; + case CHECK_CYCLES: d_csolver.checkCycles(); break; + case CHECK_FLAT_FORMS: d_csolver.checkFlatForms(); break; case CHECK_REGISTER_TERMS_PRE_NF: checkRegisterTermsPreNormalForm(); break; - case CHECK_NORMAL_FORMS_EQ: d_csolver->checkNormalFormsEq(); break; - case CHECK_NORMAL_FORMS_DEQ: d_csolver->checkNormalFormsDeq(); break; + case CHECK_NORMAL_FORMS_EQ: d_csolver.checkNormalFormsEq(); break; + case CHECK_NORMAL_FORMS_DEQ: d_csolver.checkNormalFormsDeq(); break; case CHECK_CODES: checkCodes(); break; - case CHECK_LENGTH_EQC: d_csolver->checkLengthsEqc(); break; + case CHECK_LENGTH_EQC: d_csolver.checkLengthsEqc(); break; case CHECK_REGISTER_TERMS_NF: checkRegisterTermsNormalForms(); break; - case CHECK_EXTF_REDUCTION: d_esolver->checkExtfReductions(effort); break; - case CHECK_MEMBERSHIP: d_rsolver->checkMemberships(); break; - case CHECK_CARDINALITY: d_bsolver->checkCardinality(); break; + case CHECK_EXTF_REDUCTION: d_esolver.checkExtfReductions(effort); break; + case CHECK_MEMBERSHIP: d_rsolver.checkMemberships(); break; + case CHECK_CARDINALITY: d_bsolver.checkCardinality(); break; default: Unreachable(); break; } Trace("strings-process") << "Done " << s - << ", addedFact = " << d_im->hasPendingFact() - << ", addedLemma = " << d_im->hasPendingLemma() + << ", addedFact = " << d_im.hasPendingFact() + << ", addedLemma = " << d_im.hasPendingLemma() << ", conflict = " << d_state.isInConflict() << std::endl; } @@ -1053,7 +1084,7 @@ void TheoryStrings::runStrategy(Theory::Effort e) InferStep curr = it->first; if (curr == BREAK) { - if (d_im->hasProcessed()) + if (d_im.hasProcessed()) { break; } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index dfaa99c06..13b5a5eba 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -2,10 +2,10 @@ /*! \file theory_strings.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Tim King + ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -25,12 +25,14 @@ #include "context/cdhashset.h" #include "context/cdlist.h" #include "expr/node_trie.h" +#include "theory/ext_theory.h" #include "theory/strings/base_solver.h" #include "theory/strings/core_solver.h" #include "theory/strings/extf_solver.h" #include "theory/strings/infer_info.h" #include "theory/strings/inference_manager.h" #include "theory/strings/normal_form.h" +#include "theory/strings/proof_checker.h" #include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/regexp_solver.h" @@ -68,80 +70,68 @@ class TheoryStrings : public Theory { const LogicInfo& logicInfo, ProofNodeManager* pnm); ~TheoryStrings(); + //--------------------------------- initialization + /** get the official theory rewriter of this theory */ + TheoryRewriter* getTheoryRewriter() override; + /** + * Returns true if we need an equality engine. If so, we initialize the + * information regarding how it should be setup. For details, see the + * documentation in Theory::needsEqualityEngine. + */ + bool needsEqualityEngine(EeSetupInfo& esi) override; /** finish initialization */ void finishInit() override; - /** Get the theory rewriter of this class */ - TheoryRewriter* getTheoryRewriter() override; - /** Set the master equality engine */ - void setMasterEqualityEngine(eq::EqualityEngine* eq) override; + //--------------------------------- end initialization /** Identify this theory */ std::string identify() const override; - /** Propagate */ - void propagate(Effort e) override; /** Explain */ TrustNode explain(TNode literal) override; - /** Get the equality engine */ - eq::EqualityEngine* getEqualityEngine() override; - /** Get current substitution */ - bool getCurrentSubstitution(int effort, - std::vector<Node>& vars, - std::vector<Node>& subs, - std::map<Node, std::vector<Node> >& exp) override; /** presolve */ void presolve() override; /** shutdown */ void shutdown() override {} /** add shared term */ - void addSharedTerm(TNode n) override; - /** get equality status */ - EqualityStatus getEqualityStatus(TNode a, TNode b) override; + void notifySharedTerm(TNode n) override; /** preregister term */ void preRegisterTerm(TNode n) override; /** Expand definition */ TrustNode expandDefinition(Node n) override; - /** Check at effort e */ - void check(Effort e) override; - /** needs check last effort */ + //--------------------------------- standard check + /** Do we need a check call at last call effort? */ bool needsCheckLastEffort() override; + bool preNotifyFact(TNode atom, + bool pol, + TNode fact, + bool isPrereg, + bool isInternal) override; + void notifyFact(TNode atom, bool pol, TNode fact, bool isInternal) override; + /** Post-check, called after the fact queue of the theory is processed. */ + void postCheck(Effort level) override; + //--------------------------------- end standard check + /** propagate method */ + bool propagateLit(TNode literal); /** Conflict when merging two constants */ void conflict(TNode a, TNode b); /** called when a new equivalence class is created */ void eqNotifyNewClass(TNode t); /** preprocess rewrite */ TrustNode ppRewrite(TNode atom) override; - /** - * Get all relevant information in this theory regarding the current - * model. Return false if a contradiction is discovered. - */ - bool collectModelInfo(TheoryModel* m) override; + /** Collect model values in m based on the relevant terms given by termSet */ + bool collectModelValues(TheoryModel* m, + const std::set<Node>& termSet) override; private: /** NotifyClass for equality engine */ class NotifyClass : public eq::EqualityEngineNotify { public: NotifyClass(TheoryStrings& ts) : d_str(ts), d_state(ts.d_state) {} - bool eqNotifyTriggerEquality(TNode equality, bool value) override - { - Debug("strings") << "NotifyClass::eqNotifyTriggerEquality(" << equality - << ", " << (value ? "true" : "false") << ")" << std::endl; - if (value) - { - return d_str.propagate(equality); - } - else - { - // We use only literal triggers so taking not is safe - return d_str.propagate(equality.notNode()); - } - } bool eqNotifyTriggerPredicate(TNode predicate, bool value) override { Debug("strings") << "NotifyClass::eqNotifyTriggerPredicate(" << predicate << ", " << (value ? "true" : "false") << ")" << std::endl; if (value) { - return d_str.propagate(predicate); - } else { - return d_str.propagate(predicate.notNode()); + return d_str.propagateLit(predicate); } + return d_str.propagateLit(predicate.notNode()); } bool eqNotifyTriggerTermEquality(TheoryId tag, TNode t1, @@ -150,10 +140,9 @@ class TheoryStrings : public Theory { { Debug("strings") << "NotifyClass::eqNotifyTriggerTermMerge(" << tag << ", " << t1 << ", " << t2 << ")" << std::endl; if (value) { - return d_str.propagate(t1.eqNode(t2)); - } else { - return d_str.propagate(t1.eqNode(t2).notNode()); + return d_str.propagateLit(t1.eqNode(t2)); } + return d_str.propagateLit(t1.eqNode(t2).notNode()); } void eqNotifyConstantTermMerge(TNode t1, TNode t2) override { @@ -165,14 +154,11 @@ class TheoryStrings : public Theory { Debug("strings") << "NotifyClass::eqNotifyNewClass(" << t << std::endl; d_str.eqNotifyNewClass(t); } - void eqNotifyPreMerge(TNode t1, TNode t2) override + void eqNotifyMerge(TNode t1, TNode t2) override { - Debug("strings") << "NotifyClass::eqNotifyPreMerge(" << t1 << ", " << t2 << std::endl; - d_state.eqNotifyPreMerge(t1, t2); - } - void eqNotifyPostMerge(TNode t1, TNode t2) override - { - Debug("strings") << "NotifyClass::eqNotifyPostMerge(" << t1 << ", " << t2 << std::endl; + Debug("strings") << "NotifyClass::eqNotifyMerge(" << t1 << ", " << t2 + << std::endl; + d_state.eqNotifyMerge(t1, t2); } void eqNotifyDisequal(TNode t1, TNode t2, TNode reason) override { @@ -186,8 +172,6 @@ class TheoryStrings : public Theory { /** The solver state of the theory of strings */ SolverState& d_state; };/* class TheoryStrings::NotifyClass */ - /** propagate method */ - bool propagate(TNode literal); /** compute care graph */ void computeCareGraph() override; /** @@ -269,33 +253,37 @@ class TheoryStrings : public Theory { * theories is collected in this object. */ SequencesStatistics d_statistics; - /** Equaltity engine */ - eq::EqualityEngine d_equalityEngine; /** The solver state object */ SolverState d_state; /** The term registry for this theory */ TermRegistry d_termReg; + /** The extended theory callback */ + StringsExtfCallback d_extTheoryCb; + /** Extended theory, responsible for context-dependent simplification. */ + ExtTheory d_extTheory; /** The (custom) output channel of the theory of strings */ - std::unique_ptr<InferenceManager> d_im; + InferenceManager d_im; /** The theory rewriter for this theory. */ StringsRewriter d_rewriter; + /** The proof rule checker */ + StringProofRuleChecker d_sProofChecker; /** * The base solver, responsible for reasoning about congruent terms and * inferring constants for equivalence classes. */ - std::unique_ptr<BaseSolver> d_bsolver; + BaseSolver d_bsolver; /** * The core solver, responsible for reasoning about string concatenation * with length constraints. */ - std::unique_ptr<CoreSolver> d_csolver; + CoreSolver d_csolver; /** * Extended function solver, responsible for reductions and simplifications * involving extended string functions. */ - std::unique_ptr<ExtfSolver> d_esolver; + ExtfSolver d_esolver; /** regular expression solver module */ - std::unique_ptr<RegExpSolver> d_rsolver; + RegExpSolver d_rsolver; /** regular expression elimination module */ RegExpElimination d_regexp_elim; /** Strings finite model finding decision strategy */ diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index a752958b2..81ec79327 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -16,12 +16,12 @@ #include "theory/strings/theory_strings_preprocess.h" -#include <stdint.h> - #include "expr/kind.h" +#include "options/smt_options.h" #include "options/strings_options.h" #include "proof/proof_manager.h" #include "smt/logic_exception.h" +#include "theory/quantifiers/quantifiers_attributes.h" #include "theory/strings/arith_entail.h" #include "theory/strings/sequences_rewriter.h" #include "theory/strings/word.h" @@ -33,6 +33,13 @@ namespace CVC4 { namespace theory { namespace strings { +/** Mapping to a dummy node for marking an attribute on internal quantified + * formulas */ +struct QInternalVarAttributeId +{ +}; +typedef expr::Attribute<QInternalVarAttributeId, Node> QInternalVarAttribute; + StringsPreprocess::StringsPreprocess(SkolemCache* sc, context::UserContext* u, SequencesStatistics& stats) @@ -294,7 +301,7 @@ Node StringsPreprocess::reduce(Node t, Node ux1lem = nm->mkNode(GEQ, n, ux1); lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem)); - lem = nm->mkNode(FORALL, xbv, lem); + lem = mkForallInternal(xbv, lem); conc.push_back(lem); Node nonneg = nm->mkNode(GEQ, n, zero); @@ -381,7 +388,7 @@ Node StringsPreprocess::reduce(Node t, Node ux1lem = nm->mkNode(GEQ, stoit, ux1); lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem)); - lem = nm->mkNode(FORALL, xbv, lem); + lem = mkForallInternal(xbv, lem); conc2.push_back(lem); Node sneg = nm->mkNode(LT, stoit, zero); @@ -409,6 +416,55 @@ Node StringsPreprocess::reduce(Node t, retNode = stoit; } + else if (t.getKind() == kind::SEQ_NTH) + { + // processing term: str.nth( s, n) + // similar to substr. + Node s = t[0]; + Node n = t[1]; + Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); + Node t12 = nm->mkNode(PLUS, n, one); + Node lt0 = nm->mkNode(STRING_LENGTH, s); + // start point is greater than or equal zero + Node c1 = nm->mkNode(GEQ, n, zero); + // start point is less than end of string + Node c2 = nm->mkNode(GT, lt0, n); + // check whether this application of seq.nth is defined. + Node cond = nm->mkNode(AND, c1, c2); + + // nodes for the case where `seq.nth` is defined. + Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); + Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); + Node unit = nm->mkNode(SEQ_UNIT, skt); + Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, unit, sk2)); + // length of first skolem is second argument + Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); + Node lsk2 = nm->mkNode(STRING_LENGTH, sk2); + Node b13 = nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, t12)); + Node b1 = nm->mkNode(AND, b11, b12, b13); + + // nodes for the case where `seq.nth` is undefined. + std::vector<TypeNode> argTypes; + argTypes.push_back(s.getType()); + argTypes.push_back(nm->integerType()); + TypeNode elemType = s.getType().getSequenceElementType(); + TypeNode ufType = nm->mkFunctionType(argTypes, elemType); + Node uf = sc->mkTypedSkolemCached( + ufType, Node::null(), Node::null(), SkolemCache::SK_NTH, "Uf"); + Node b2 = nm->mkNode(EQUAL, skt, nm->mkNode(APPLY_UF, uf, s, n)); + + // the full ite, split on definedness of `seq.nth` + Node lemma = nm->mkNode(ITE, cond, b1, b2); + + // assert: + // IF n >=0 AND n < len( s ) + // THEN: s = sk1 ++ unit(skt) ++ sk2 AND + // len( sk1 ) = n AND + // ( len( sk2 ) = len( s )- (n+1) + // ELSE: skt = Uf(s, n), where Uf is a cached skolem function. + asserts.push_back(lemma); + retNode = skt; + } else if (t.getKind() == kind::STRING_STRREPL) { // processing term: replace( x, y, z ) @@ -518,8 +574,8 @@ Node StringsPreprocess::reduce(Node t, flem.push_back( ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y)))); - Node q = nm->mkNode( - FORALL, bvli, nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem))); + Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); + Node q = mkForallInternal(bvli, body); lem.push_back(q); // assert: @@ -688,8 +744,8 @@ Node StringsPreprocess::reduce(Node t, .eqNode(nm->mkNode( STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1)))); - Node forall = nm->mkNode( - FORALL, bvli, nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem))); + Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); + Node forall = mkForallInternal(bvli, body); lemmas.push_back(forall); // IF in_re(x, re.++(_*, y', _*)) @@ -744,8 +800,8 @@ Node StringsPreprocess::reduce(Node t, Node bound = nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); - Node rangeA = - nm->mkNode(FORALL, bvi, nm->mkNode(OR, bound.negate(), ri.eqNode(res))); + Node body = nm->mkNode(OR, bound.negate(), ri.eqNode(res)); + Node rangeA = mkForallInternal(bvi, body); // upper 65 ... 90 // lower 97 ... 122 @@ -779,8 +835,8 @@ Node StringsPreprocess::reduce(Node t, Node bound = nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); - Node rangeA = nm->mkNode( - FORALL, bvi, nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx))); + Node body = nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx)); + Node rangeA = mkForallInternal(bvi, body); // assert: // len(r) = len(x) ^ // forall i. 0 <= i < len(r) => @@ -817,7 +873,7 @@ Node StringsPreprocess::reduce(Node t, { Node ltp = sc->mkTypedSkolemCached( nm->booleanType(), t, SkolemCache::SK_PURIFY, "ltp"); - Node k = nm->mkSkolem("k", nm->integerType()); + Node k = SkolemCache::mkIndexVar(t); std::vector<Node> conj; conj.push_back(nm->mkNode(GEQ, k, zero)); @@ -841,6 +897,8 @@ Node StringsPreprocess::reduce(Node t, } conj.push_back(nm->mkNode(ITE, ite_ch)); + Node conjn = nm->mkNode( + EXISTS, nm->mkNode(BOUND_VAR_LIST, k), nm->mkNode(AND, conj)); // Intuitively, the reduction says either x and y are equal, or they have // some (maximal) common prefix after which their characters at position k // are distinct, and the comparison of their code matches the return value @@ -854,13 +912,13 @@ Node StringsPreprocess::reduce(Node t, // assert: // IF x=y // THEN: ltp - // ELSE: k >= 0 AND k <= len( x ) AND k <= len( y ) AND + // ELSE: exists k. + // k >= 0 AND k <= len( x ) AND k <= len( y ) AND // substr( x, 0, k ) = substr( y, 0, k ) AND // IF ltp // THEN: str.code(substr( x, k, 1 )) < str.code(substr( y, k, 1 )) // ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 )) - Node assert = - nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, nm->mkNode(AND, conj)); + Node assert = nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, conjn); asserts.push_back(assert); // Thus, str.<=( x, y ) = ltp @@ -972,12 +1030,39 @@ void StringsPreprocess::processAssertions( std::vector< Node > &vec_node ){ : NodeManager::currentNM()->mkNode(kind::AND, asserts); if( res!=vec_node[i] ){ res = Rewriter::rewrite( res ); - PROOF( ProofManager::currentPM()->addDependence( res, vec_node[i] ); ); + if (options::unsatCores()) + { + ProofManager::currentPM()->addDependence(res, vec_node[i]); + } vec_node[i] = res; } } } +Node StringsPreprocess::mkForallInternal(Node bvl, Node body) +{ + NodeManager* nm = NodeManager::currentNM(); + QInternalVarAttribute qiva; + Node qvar; + if (bvl.hasAttribute(qiva)) + { + qvar = bvl.getAttribute(qiva); + } + else + { + qvar = nm->mkSkolem("qinternal", nm->booleanType()); + // this dummy variable marks that the quantified formula is internal + qvar.setAttribute(InternalQuantAttribute(), true); + // remember the dummy variable + bvl.setAttribute(qiva, qvar); + } + // make the internal attribute, and put it in a singleton list + Node ip = nm->mkNode(INST_ATTRIBUTE, qvar); + Node ipl = nm->mkNode(INST_PATTERN_LIST, ip); + // make the overall formula + return nm->mkNode(FORALL, bvl, body, ipl); +} + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h index 113d909a8..124a09a4c 100644 --- a/src/theory/strings/theory_strings_preprocess.h +++ b/src/theory/strings/theory_strings_preprocess.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -100,6 +100,14 @@ class StringsPreprocess { Node simplifyRec(Node t, std::vector<Node>& asserts, std::map<Node, Node>& visited); + /** + * Make internal quantified formula with bound variable list bvl and body. + * Internally, we get a node corresponding to marking a quantified formula as + * an "internal" one. This node is provided as the third argument of the + * FORALL returned by this method. This ensures that E-matching is not applied + * to the quantified formula. + */ + static Node mkForallInternal(Node bvl, Node body); }; }/* CVC4::theory::strings namespace */ diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 12ddb8a3d..cbf8fb1e0 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -2,10 +2,10 @@ /*! \file theory_strings_type_rules.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Mathias Preiner + ** Andrew Reynolds, Tianyi Liang, Yoni Zohar ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -376,6 +376,30 @@ class SeqUnitTypeRule } }; +class SeqNthTypeRule +{ + public: + static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) + { + TypeNode t = n[0].getType(check); + TypeNode t1 = t.getSequenceElementType(); + if (check) + { + if (!t.isSequence()) + { + throw TypeCheckingExceptionPrivate(n, "expecting a sequence in nth"); + } + TypeNode t2 = n[1].getType(check); + if (!t2.isInteger()) + { + throw TypeCheckingExceptionPrivate( + n, "expecting an integer start term in nth"); + } + } + return t1; + } +}; + /** Properties of the sequence type */ struct SequenceProperties { diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index 3cf14fead..286c0dc04 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** @@ -382,7 +382,7 @@ TypeNode getOwnerStringType(Node n) TypeNode tn; Kind k = n.getKind(); if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN - || k == STRING_PREFIX || k == STRING_SUFFIX) + || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX) { // owning string type is the type of first argument tn = n[0].getType(); diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 803a5ffea..6833d265b 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/type_enumerator.cpp b/src/theory/strings/type_enumerator.cpp index ae88f63f7..2412f9217 100644 --- a/src/theory/strings/type_enumerator.cpp +++ b/src/theory/strings/type_enumerator.cpp @@ -2,10 +2,10 @@ /*! \file type_enumerator.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Mathias Preiner, Tim King + ** Andrew Reynolds, Mathias Preiner, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/type_enumerator.h b/src/theory/strings/type_enumerator.h index 602d73059..91cb0502f 100644 --- a/src/theory/strings/type_enumerator.h +++ b/src/theory/strings/type_enumerator.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index 63e3f1dba..944a088ed 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -2,10 +2,10 @@ /*! \file word.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index bace06bfb..2343dc583 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -5,7 +5,7 @@ ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. + ** in the top-level source directory and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** |