diff options
Diffstat (limited to 'src/theory/strings')
56 files changed, 1916 insertions, 847 deletions
diff --git a/src/theory/strings/arith_entail.cpp b/src/theory/strings/arith_entail.cpp index 71680264d..adcbe590e 100644 --- a/src/theory/strings/arith_entail.cpp +++ b/src/theory/strings/arith_entail.cpp @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/arith_entail.h b/src/theory/strings/arith_entail.h index 266f726c7..e3dd7e2e5 100644 --- a/src/theory/strings/arith_entail.h +++ b/src/theory/strings/arith_entail.h @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp index 16c83de78..952f26691 100644 --- a/src/theory/strings/base_solver.cpp +++ b/src/theory/strings/base_solver.cpp @@ -2,9 +2,9 @@ /*! \file base_solver.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -15,6 +15,7 @@ #include "theory/strings/base_solver.h" +#include "expr/sequence.h" #include "options/strings_options.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" @@ -27,11 +28,8 @@ namespace CVC4 { namespace theory { namespace strings { -BaseSolver::BaseSolver(context::Context* c, - context::UserContext* u, - SolverState& s, - InferenceManager& im) - : d_state(s), d_im(im), d_congruent(c) +BaseSolver::BaseSolver(SolverState& s, InferenceManager& im) + : d_state(s), d_im(im), d_congruent(s.getSatContext()) { d_false = NodeManager::currentNM()->mkConst(false); d_cardSize = utils::getAlphabetCardinality(); @@ -57,7 +55,7 @@ void BaseSolver::checkInit() if (!tn.isRegExp()) { Node emps; - if (tn.isString()) + if (tn.isStringLike()) { d_stringsEqc.push_back(eqc); emps = Word::mkEmptyWord(tn); @@ -67,19 +65,75 @@ void BaseSolver::checkInit() while (!eqc_i.isFinished()) { Node n = *eqc_i; - if (n.isConst()) + Kind k = n.getKind(); + // process constant-like terms + if (utils::isConstantLike(n)) { - d_eqcInfo[eqc].d_bestContent = n; - d_eqcInfo[eqc].d_base = n; - d_eqcInfo[eqc].d_exp = Node::null(); + Node prev = d_eqcInfo[eqc].d_bestContent; + if (!prev.isNull()) + { + // we have either (seq.unit x) = C, or (seq.unit x) = (seq.unit y) + // where C is a sequence constant. + Node cval = + prev.isConst() ? prev : (n.isConst() ? n : Node::null()); + std::vector<Node> exp; + exp.push_back(prev.eqNode(n)); + Node s, t; + if (cval.isNull()) + { + // injectivity of seq.unit + s = prev[0]; + t = n[0]; + } + else + { + // should not have two constants in the same equivalence class + Assert(cval.getType().isSequence()); + std::vector<Node> cchars = Word::getChars(cval); + if (cchars.size() == 1) + { + Node oval = prev.isConst() ? n : prev; + Assert(oval.getKind() == SEQ_UNIT); + s = oval[0]; + t = cchars[0] + .getConst<ExprSequence>() + .getSequence() + .getVec()[0]; + // oval is congruent (ignored) in this context + d_congruent.insert(oval); + } + else + { + // (seq.unit x) = C => false if |C| != 1. + d_im.sendInference( + exp, d_false, Inference::UNIT_CONST_CONFLICT); + return; + } + } + if (!d_state.areEqual(s, t)) + { + // (seq.unit x) = (seq.unit y) => x=y, or + // (seq.unit x) = (seq.unit c) => x=c + Assert(s.getType() == t.getType()); + d_im.sendInference(exp, s.eqNode(t), Inference::UNIT_INJ); + } + } + // update best content + if (prev.isNull() || n.isConst()) + { + d_eqcInfo[eqc].d_bestContent = n; + d_eqcInfo[eqc].d_bestScore = 0; + d_eqcInfo[eqc].d_base = n; + d_eqcInfo[eqc].d_exp = Node::null(); + } } - else if (tn.isInteger()) + if (tn.isInteger()) { // do nothing } + // process indexing else if (n.getNumChildren() > 0) { - Kind k = n.getKind(); if (k != EQUAL) { if (d_congruent.find(n) == d_congruent.end()) @@ -90,7 +144,7 @@ void BaseSolver::checkInit() { // check if we have inferred a new equality by removal of empty // components - if (n.getKind() == STRING_CONCAT && !d_state.areEqual(nc, n)) + if (k == STRING_CONCAT && !d_state.areEqual(nc, n)) { std::vector<Node> exp; size_t count[2] = {0, 0}; @@ -191,7 +245,7 @@ void BaseSolver::checkInit() } } } - else + else if (!n.isConst()) { if (d_congruent.find(n) == d_congruent.end()) { @@ -323,8 +377,10 @@ void BaseSolver::checkConstantEquivalenceClasses(TermIndex* ti, Node nrr = d_state.getRepresentative(n[count]); Assert(!d_eqcInfo[nrr].d_bestContent.isNull() && d_eqcInfo[nrr].d_bestContent.isConst()); + // must flatten to avoid nested AND in explanations + utils::flattenOp(AND, d_eqcInfo[nrr].d_exp, exp); + // now explain equality to base d_im.addToExplanation(n[count], d_eqcInfo[nrr].d_base, exp); - exp.push_back(d_eqcInfo[nrr].d_exp); } else { @@ -352,10 +408,15 @@ void BaseSolver::checkConstantEquivalenceClasses(TermIndex* ti, Node nct = utils::mkNConcat(vecnc, n.getType()); Assert(!nct.isConst()); bei.d_bestContent = nct; + bei.d_bestScore = contentSize; bei.d_base = n; - bei.d_exp = utils::mkAnd(exp); + if (!exp.empty()) + { + bei.d_exp = utils::mkAnd(exp); + } Trace("strings-debug") - << "Set eqc best content " << n << " to " << nct << std::endl; + << "Set eqc best content " << n << " to " << nct + << ", explanation = " << bei.d_exp << std::endl; } } } @@ -370,11 +431,12 @@ void BaseSolver::checkConstantEquivalenceClasses(TermIndex* ti, BaseEqcInfo& bei = d_eqcInfo[nr]; if (!bei.d_bestContent.isConst()) { - Trace("strings-debug") - << "Set eqc const " << n << " to " << c << std::endl; bei.d_bestContent = c; bei.d_base = n; bei.d_exp = utils::mkAnd(exp); + Trace("strings-debug") + << "Set eqc const " << n << " to " << c + << ", explanation = " << bei.d_exp << std::endl; } else if (c != bei.d_bestContent) { @@ -432,11 +494,39 @@ void BaseSolver::checkCardinality() // are pairwise propagated to be equal. We do not require disequalities // between the lengths of each collection, since we split on disequalities // between lengths of string terms that are disequal (DEQ-LENGTH-SP). - std::vector<std::vector<Node> > cols; - std::vector<Node> lts; + std::map<TypeNode, std::vector<std::vector<Node> > > cols; + std::map<TypeNode, std::vector<Node> > lts; d_state.separateByLength(d_stringsEqc, cols, lts); + for (std::pair<const TypeNode, std::vector<std::vector<Node> > >& c : cols) + { + checkCardinalityType(c.first, c.second, lts[c.first]); + } +} + +void BaseSolver::checkCardinalityType(TypeNode tn, + std::vector<std::vector<Node> >& cols, + std::vector<Node>& lts) +{ + Trace("strings-card") << "Check cardinality (type " << tn << ")..." + << std::endl; NodeManager* nm = NodeManager::currentNM(); - Trace("strings-card") << "Check cardinality...." << std::endl; + uint32_t typeCardSize; + if (tn.isString()) // string-only + { + typeCardSize = d_cardSize; + } + else + { + Assert(tn.isSequence()); + TypeNode etn = tn.getSequenceElementType(); + if (etn.isInterpretedFinite()) + { + // infinite cardinality, we are fine + return; + } + // TODO (cvc4-projects #23): how to handle sequence for finite types? + return; + } // for each collection for (unsigned i = 0, csize = cols.size(); i < csize; ++i) { @@ -451,15 +541,15 @@ void BaseSolver::checkCardinality() // size > c^k unsigned card_need = 1; double curr = static_cast<double>(cols[i].size()); - while (curr > d_cardSize) + while (curr > typeCardSize) { - curr = curr / static_cast<double>(d_cardSize); + curr = curr / static_cast<double>(typeCardSize); card_need++; } Trace("strings-card") << "Need length " << card_need - << " for this number of strings (where alphabet size is " << d_cardSize - << ")." << std::endl; + << " for this number of strings (where alphabet size is " + << typeCardSize << ") given type " << tn << "." << std::endl; // check if we need to split bool needsSplit = true; if (lr.isConst()) @@ -472,8 +562,10 @@ void BaseSolver::checkCardinality() else { // find the minimimum constant that we are unknown to be disequal from, or - // otherwise stop if we increment such that cardinality does not apply - unsigned r = 0; + // otherwise stop if we increment such that cardinality does not apply. + // We always start with r=1 since by the invariants of our term registry, + // a term is either equal to the empty string, or has length >= 1. + unsigned r = 1; bool success = true; while (r < card_need && success) { @@ -530,8 +622,8 @@ void BaseSolver::checkCardinality() Node k_node = nm->mkConst(Rational(int_k)); // add cardinality lemma Node dist = nm->mkNode(DISTINCT, cols[i]); - std::vector<Node> vec_node; - vec_node.push_back(dist); + std::vector<Node> expn; + expn.push_back(dist); for (std::vector<Node>::iterator itr1 = cols[i].begin(); itr1 != cols[i].end(); ++itr1) @@ -540,7 +632,7 @@ void BaseSolver::checkCardinality() if (len != lr) { Node len_eq_lr = len.eqNode(lr); - vec_node.push_back(len_eq_lr); + expn.push_back(len_eq_lr); } } Node len = nm->mkNode(STRING_LENGTH, cols[i][0]); @@ -551,7 +643,7 @@ void BaseSolver::checkCardinality() { std::vector<Node> emptyVec; d_im.sendInference( - emptyVec, vec_node, cons, Inference::CARDINALITY, true); + emptyVec, expn, cons, Inference::CARDINALITY, true); return; } } diff --git a/src/theory/strings/base_solver.h b/src/theory/strings/base_solver.h index 1960b8352..5dcb75560 100644 --- a/src/theory/strings/base_solver.h +++ b/src/theory/strings/base_solver.h @@ -2,9 +2,9 @@ /*! \file base_solver.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -41,10 +41,7 @@ class BaseSolver using NodeSet = context::CDHashSet<Node, NodeHashFunction>; public: - BaseSolver(context::Context* c, - context::UserContext* u, - SolverState& s, - InferenceManager& im); + BaseSolver(SolverState& s, InferenceManager& im); ~BaseSolver(); //-----------------------inference steps @@ -200,6 +197,19 @@ class BaseSolver std::vector<Node>& vecc, bool ensureConst = true, bool isConst = true); + /** + * Check cardinality for type tn. This adds a lemma corresponding to + * cardinality for terms of type tn, if applicable. + * + * @param tn The string-like type of terms we are considering, + * @param cols The list of collections of equivalence classes. This is a + * partition of all string equivalence classes, grouped by those with equal + * lengths. + * @param lts The length of each of the collections in cols. + */ + void checkCardinalityType(TypeNode tn, + std::vector<std::vector<Node> >& cols, + std::vector<Node>& lts); /** The solver state object */ SolverState& d_state; /** The (custom) output channel of the theory of strings */ diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 604abb1d7..a38d16279 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -1,10 +1,10 @@ /********************* */ -/*! \file theory_strings.cpp +/*! \file core_solver.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -707,15 +707,17 @@ void CoreSolver::getNormalForms(Node eqc, while( !eqc_i.isFinished() ){ Node n = (*eqc_i); if( !d_bsolver.isCongruent(n) ){ - if (n.isConst() || n.getKind() == STRING_CONCAT) + Kind nk = n.getKind(); + bool isCLike = utils::isConstantLike(n); + if (isCLike || nk == STRING_CONCAT) { Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl; NormalForm nf_curr; - if (n.isConst()) + if (isCLike) { nf_curr.init(n); } - else if (n.getKind() == STRING_CONCAT) + else if (nk == STRING_CONCAT) { // set the base to n, we construct the other portions of nf_curr in // the following. @@ -791,7 +793,8 @@ void CoreSolver::getNormalForms(Node eqc, } //if not equal to self std::vector<Node>& currv = nf_curr.d_nf; - if (currv.size() > 1 || (currv.size() == 1 && currv[0].isConst())) + if (currv.size() > 1 + || (currv.size() == 1 && utils::isConstantLike(currv[0]))) { // if in a build with assertions, check that normal form is acyclic if (Configuration::isAssertionBuild()) @@ -2239,8 +2242,6 @@ bool CoreSolver::isNormalFormPair( Node n1, Node n2 ) { void CoreSolver::checkNormalFormsDeq() { eq::EqualityEngine* ee = d_state.getEqualityEngine(); - std::vector< std::vector< Node > > cols; - std::vector< Node > lts; std::map< Node, std::map< Node, bool > > processed; const context::CDList<Node>& deqs = d_state.getDisequalityList(); @@ -2270,9 +2271,18 @@ void CoreSolver::checkNormalFormsDeq() } } - if (!d_im.hasProcessed()) + if (d_im.hasProcessed()) { - d_state.separateByLength(d_strings_eqc, cols, lts); + // added splitting lemma above + return; + } + // otherwise, look at pairs of equivalence classes with equal lengths + std::map<TypeNode, std::vector<std::vector<Node> > > colsT; + std::map<TypeNode, std::vector<Node> > ltsT; + d_state.separateByLength(d_strings_eqc, colsT, ltsT); + for (std::pair<const TypeNode, std::vector<std::vector<Node> > >& ct : colsT) + { + std::vector<std::vector<Node> >& cols = ct.second; for( unsigned i=0; i<cols.size(); i++ ){ if (cols[i].size() > 1 && !d_im.hasPendingLemma()) { diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h index 029f1c850..db1f5ecf6 100644 --- a/src/theory/strings/core_solver.h +++ b/src/theory/strings/core_solver.h @@ -1,10 +1,10 @@ /********************* */ -/*! \file theory_strings.h +/*! \file core_solver.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/eqc_info.cpp b/src/theory/strings/eqc_info.cpp index 3c0dbc2a7..31d7f8b01 100644 --- a/src/theory/strings/eqc_info.cpp +++ b/src/theory/strings/eqc_info.cpp @@ -2,9 +2,9 @@ /*! \file eqc_info.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/eqc_info.h b/src/theory/strings/eqc_info.h index 241b7d523..108264969 100644 --- a/src/theory/strings/eqc_info.h +++ b/src/theory/strings/eqc_info.h @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index dd4144313..4ee2f4919 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -1,10 +1,10 @@ /********************* */ -/*! \file ext_solver.cpp +/*! \file extf_solver.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -56,6 +56,8 @@ ExtfSolver::ExtfSolver(context::Context* c, d_extt.addFunctionKind(kind::STRING_STOI); d_extt.addFunctionKind(kind::STRING_STRREPL); d_extt.addFunctionKind(kind::STRING_STRREPLALL); + d_extt.addFunctionKind(kind::STRING_REPLACE_RE); + d_extt.addFunctionKind(kind::STRING_REPLACE_RE_ALL); d_extt.addFunctionKind(kind::STRING_STRCTN); d_extt.addFunctionKind(kind::STRING_IN_REGEXP); d_extt.addFunctionKind(kind::STRING_LEQ); @@ -63,6 +65,7 @@ ExtfSolver::ExtfSolver(context::Context* c, d_extt.addFunctionKind(kind::STRING_TOLOWER); d_extt.addFunctionKind(kind::STRING_TOUPPER); d_extt.addFunctionKind(kind::STRING_REV); + d_extt.addFunctionKind(kind::SEQ_UNIT); d_true = NodeManager::currentNM()->mkConst(true); d_false = NodeManager::currentNM()->mkConst(false); @@ -136,16 +139,18 @@ bool ExtfSolver::doReduction(int effort, Node n) } } } - else + else if (k == STRING_SUBSTR) { - if (k == STRING_SUBSTR) - { - r_effort = 1; - } - else if (k != STRING_IN_REGEXP) - { - r_effort = 2; - } + r_effort = 1; + } + else if (k == SEQ_UNIT) + { + // never necessary to reduce seq.unit + return false; + } + else if (k != STRING_IN_REGEXP) + { + r_effort = 2; } if (effort != r_effort) { @@ -180,8 +185,9 @@ bool ExtfSolver::doReduction(int effort, Node n) NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL - || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER - || k == STRING_TOUPPER || k == STRING_REV); + || k == STRING_STRREPLALL || k == STRING_REPLACE_RE + || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ + || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); std::vector<Node> new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h index fb9b836db..d99a881f6 100644 --- a/src/theory/strings/extf_solver.h +++ b/src/theory/strings/extf_solver.h @@ -1,10 +1,10 @@ /********************* */ -/*! \file ext_solver.h +/*! \file extf_solver.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp index e26a49e4e..c75e03440 100644 --- a/src/theory/strings/infer_info.cpp +++ b/src/theory/strings/infer_info.cpp @@ -2,9 +2,9 @@ /*! \file infer_info.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -26,6 +26,8 @@ const char* toString(Inference i) case Inference::I_CONST_MERGE: return "I_CONST_MERGE"; case Inference::I_CONST_CONFLICT: return "I_CONST_CONFLICT"; case Inference::I_NORM: return "I_NORM"; + case Inference::UNIT_INJ: return "UNIT_INJ"; + case Inference::UNIT_CONST_CONFLICT: return "UNIT_CONST_CONFLICT"; case Inference::CARD_SP: return "CARD_SP"; case Inference::CARDINALITY: return "CARDINALITY"; case Inference::I_CYCLE_E: return "I_CYCLE_E"; diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h index 5aecf52cb..2a42b9fab 100644 --- a/src/theory/strings/infer_info.h +++ b/src/theory/strings/infer_info.h @@ -2,9 +2,9 @@ /*! \file infer_info.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -59,6 +59,10 @@ enum class Inference : uint32_t // equal after e.g. removing strings that are currently empty. For example: // y = "" ^ z = "" => x ++ y = z ++ x I_NORM, + // injectivity of seq.unit + UNIT_INJ, + // unit constant conflict + UNIT_CONST_CONFLICT, // A split due to cardinality CARD_SP, // The cardinality inference for strings, see Liang et al CAV 2014. diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp index 9fb1011ee..91af2a434 100644 --- a/src/theory/strings/inference_manager.cpp +++ b/src/theory/strings/inference_manager.cpp @@ -2,9 +2,9 @@ /*! \file inference_manager.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h index cebd88a7f..4e50a6cb7 100644 --- a/src/theory/strings/inference_manager.h +++ b/src/theory/strings/inference_manager.h @@ -2,9 +2,9 @@ /*! \file inference_manager.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 27ffe5d26..96ba82a44 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -23,6 +23,10 @@ operator STRING_LEQ 2 "string less than or equal" operator STRING_STRIDOF 3 "string indexof" operator STRING_STRREPL 3 "string replace" operator STRING_STRREPLALL 3 "string replace all" +operator STRING_REPLACE_RE 3 "string replace regular expression match" +operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches" +typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>" +typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>" operator STRING_PREFIX 2 "string prefixof" operator STRING_SUFFIX 2 "string suffixof" operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit" @@ -58,12 +62,27 @@ constant CONST_STRING \ "util/string.h" \ "a string of characters" +# the type +operator SEQUENCE_TYPE 1 "seuence type, takes as parameter the type of the elements" +cardinality SEQUENCE_TYPE \ + "::CVC4::theory::strings::SequenceProperties::computeCardinality(%TYPE%)" \ + "theory/strings/theory_strings_type_rules.h" +well-founded SEQUENCE_TYPE \ + "::CVC4::theory::strings::SequenceProperties::isWellFounded(%TYPE%)" \ + "::CVC4::theory::strings::SequenceProperties::mkGroundTerm(%TYPE%)" \ + "theory/strings/theory_strings_type_rules.h" +enumerator SEQUENCE_TYPE \ + "::CVC4::theory::strings::SequenceEnumerator" \ + "theory/strings/type_enumerator.h" + constant CONST_SEQUENCE \ ::CVC4::ExprSequence \ ::CVC4::ExprSequenceHashFunction \ "expr/expr_sequence.h" \ "a sequence of characters" +operator SEQ_UNIT 1 "a sequence of length one" + # equal equal / less than / output operator STRING_TO_REGEXP 1 "convert string to regexp" operator REGEXP_CONCAT 2: "regexp concat" @@ -144,4 +163,9 @@ typerule STRING_FROM_CODE "SimpleTypeRule<RString, AInteger>" typerule STRING_TOUPPER "SimpleTypeRule<RString, AString>" typerule STRING_TOLOWER "SimpleTypeRule<RString, AString>" +### sequence specific operators + +typerule CONST_SEQUENCE ::CVC4::theory::strings::ConstSequenceTypeRule +typerule SEQ_UNIT ::CVC4::theory::strings::SeqUnitTypeRule + endtheory diff --git a/src/theory/strings/normal_form.cpp b/src/theory/strings/normal_form.cpp index 05be5f12a..2466b6d21 100644 --- a/src/theory/strings/normal_form.cpp +++ b/src/theory/strings/normal_form.cpp @@ -2,9 +2,9 @@ /*! \file normal_form.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/normal_form.h b/src/theory/strings/normal_form.h index 35a7fadb3..bd60b0252 100644 --- a/src/theory/strings/normal_form.h +++ b/src/theory/strings/normal_form.h @@ -2,9 +2,9 @@ /*! \file normal_form.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp index d4f301e23..37920d248 100644 --- a/src/theory/strings/regexp_elim.cpp +++ b/src/theory/strings/regexp_elim.cpp @@ -2,9 +2,9 @@ /*! \file regexp_elim.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang + ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -27,10 +27,6 @@ using namespace CVC4::theory::strings; RegExpElimination::RegExpElimination() { - d_zero = NodeManager::currentNM()->mkConst(Rational(0)); - d_one = NodeManager::currentNM()->mkConst(Rational(1)); - d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); - d_regExpType = NodeManager::currentNM()->regExpType(); } Node RegExpElimination::eliminate(Node atom) @@ -53,6 +49,7 @@ Node RegExpElimination::eliminateConcat(Node atom) Node x = atom[0]; Node lenx = nm->mkNode(STRING_LENGTH, x); Node re = atom[1]; + Node zero = nm->mkConst(Rational(0)); std::vector<Node> children; utils::getConcat(re, children); @@ -80,7 +77,7 @@ Node RegExpElimination::eliminateConcat(Node atom) hasPivotIndex = true; pivotIndex = i; // set to zero for the sum below - fl = d_zero; + fl = zero; } else { @@ -100,7 +97,7 @@ Node RegExpElimination::eliminateConcat(Node atom) Node sum = nm->mkNode(PLUS, childLengths); std::vector<Node> conc; conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, sum)); - Node currEnd = d_zero; + Node currEnd = zero; for (unsigned i = 0, size = childLengths.size(); i < size; i++) { if (hasPivotIndex && i == pivotIndex) @@ -190,7 +187,7 @@ Node RegExpElimination::eliminateConcat(Node atom) // set of string terms are found, in order, in string x. // prev_end stores the current (symbolic) index in x that we are // searching. - Node prev_end = d_zero; + Node prev_end = zero; // the symbolic index we start searching, for each child in sep_children. std::vector<Node> prev_ends; unsigned gap_minsize_end = gap_minsize.back(); @@ -232,7 +229,7 @@ Node RegExpElimination::eliminateConcat(Node atom) prev_end = nm->mkNode(PLUS, prev_end, k); } Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end); - Node idofFind = curr.eqNode(d_neg_one).negate(); + Node idofFind = curr.eqNode(nm->mkConst(Rational(-1))).negate(); conj.push_back(idofFind); prev_end = nm->mkNode(PLUS, curr, lensc); } @@ -308,7 +305,7 @@ Node RegExpElimination::eliminateConcat(Node atom) for (const Node& v : non_greedy_find_vars) { Node bound = nm->mkNode( - AND, nm->mkNode(LEQ, d_zero, v), nm->mkNode(LT, v, lenx)); + AND, nm->mkNode(LEQ, zero, v), nm->mkNode(LT, v, lenx)); children2.push_back(bound); } children2.push_back(res); @@ -341,7 +338,7 @@ Node RegExpElimination::eliminateConcat(Node atom) // if the first or last child is constant string, we can split the membership // into a conjunction of two memberships. - Node sStartIndex = d_zero; + Node sStartIndex = zero; Node sLength = lenx; std::vector<Node> sConstraints; std::vector<Node> rexpElimChildren; @@ -356,7 +353,7 @@ Node RegExpElimination::eliminateConcat(Node atom) Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP); Node s = c[0]; Node lens = nm->mkNode(STRING_LENGTH, s); - Node sss = r == 0 ? d_zero : nm->mkNode(MINUS, lenx, lens); + Node sss = r == 0 ? zero : nm->mkNode(MINUS, lenx, lens); Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens); sConstraints.push_back(ss.eqNode(s)); if (r == 0) @@ -383,7 +380,7 @@ Node RegExpElimination::eliminateConcat(Node atom) Assert(rexpElimChildren.size() + sConstraints.size() == nchildren); Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength); Assert(!rexpElimChildren.empty()); - Node regElim = utils::mkConcat(rexpElimChildren, d_regExpType); + Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType()); sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim)); Node ret = nm->mkNode(AND, sConstraints); // e.g. @@ -402,7 +399,7 @@ Node RegExpElimination::eliminateConcat(Node atom) std::vector<Node> echildren; if (i == 0) { - k = d_zero; + k = zero; } else if (i + 1 == nchildren) { @@ -413,7 +410,7 @@ Node RegExpElimination::eliminateConcat(Node atom) k = nm->mkBoundVar(nm->integerType()); Node bound = nm->mkNode(AND, - nm->mkNode(LEQ, d_zero, k), + nm->mkNode(LEQ, zero, k), nm->mkNode(LEQ, k, nm->mkNode(MINUS, lenx, lens))); echildren.push_back(bound); } @@ -423,16 +420,16 @@ Node RegExpElimination::eliminateConcat(Node atom) { std::vector<Node> rprefix; rprefix.insert(rprefix.end(), children.begin(), children.begin() + i); - Node rpn = utils::mkConcat(rprefix, d_regExpType); + Node rpn = utils::mkConcat(rprefix, nm->regExpType()); Node substrPrefix = nm->mkNode( - STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, d_zero, k), rpn); + STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, zero, k), rpn); echildren.push_back(substrPrefix); } if (i + 1 < nchildren) { std::vector<Node> rsuffix; rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end()); - Node rps = utils::mkConcat(rsuffix, d_regExpType); + Node rps = utils::mkConcat(rsuffix, nm->regExpType()); Node ks = nm->mkNode(PLUS, k, lens); Node substrSuffix = nm->mkNode( STRING_IN_REGEXP, @@ -470,6 +467,7 @@ Node RegExpElimination::eliminateStar(Node atom) Node x = atom[0]; Node lenx = nm->mkNode(STRING_LENGTH, x); Node re = atom[1]; + Node zero = nm->mkConst(Rational(0)); // for regular expression star, // if the period is a fixed constant, we can turn it into a bounded // quantifier @@ -488,7 +486,8 @@ Node RegExpElimination::eliminateStar(Node atom) bool lenOnePeriod = true; std::vector<Node> char_constraints; Node index = nm->mkBoundVar(nm->integerType()); - Node substr_ch = nm->mkNode(STRING_SUBSTR, x, index, d_one); + Node substr_ch = + nm->mkNode(STRING_SUBSTR, x, index, nm->mkConst(Rational(1))); substr_ch = Rewriter::rewrite(substr_ch); // handle the case where it is purely characters for (const Node& r : disj) @@ -526,7 +525,7 @@ Node RegExpElimination::eliminateStar(Node atom) { Assert(!char_constraints.empty()); Node bound = nm->mkNode( - AND, nm->mkNode(LEQ, d_zero, index), nm->mkNode(LT, index, lenx)); + AND, nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, lenx)); Node conc = char_constraints.size() == 1 ? char_constraints[0] : nm->mkNode(OR, char_constraints); Node body = nm->mkNode(OR, bound.negate(), conc); @@ -554,7 +553,7 @@ Node RegExpElimination::eliminateStar(Node atom) // lens is a positive constant, so it is safe to use total div/mod here. Node bound = nm->mkNode( AND, - nm->mkNode(LEQ, d_zero, index), + nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION_TOTAL, lenx, lens))); Node conc = nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens) @@ -563,9 +562,7 @@ Node RegExpElimination::eliminateStar(Node atom) Node bvl = nm->mkNode(BOUND_VAR_LIST, index); Node res = nm->mkNode(FORALL, bvl, body); res = nm->mkNode( - AND, - nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(d_zero), - res); + AND, nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero), res); // e.g. // x in ("abc")* ---> // forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^ diff --git a/src/theory/strings/regexp_elim.h b/src/theory/strings/regexp_elim.h index 61ce8a920..e5f2fa854 100644 --- a/src/theory/strings/regexp_elim.h +++ b/src/theory/strings/regexp_elim.h @@ -2,9 +2,9 @@ /*! \file regexp_elim.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -40,25 +40,19 @@ class RegExpElimination * form (str.in.re x R). If this method returns a non-null node ret, then ret * is equivalent to atom. */ - Node eliminate(Node atom); + static Node eliminate(Node atom); private: - /** common terms */ - Node d_zero; - Node d_one; - Node d_neg_one; - /** The type of regular expressions */ - TypeNode d_regExpType; /** return elimination * * This method is called when atom is rewritten to atomElim, and returns * atomElim. id is an identifier indicating the reason for the elimination. */ - Node returnElim(Node atom, Node atomElim, const char* id); + static Node returnElim(Node atom, Node atomElim, const char* id); /** elimination for regular expression concatenation */ - Node eliminateConcat(Node atom); + static Node eliminateConcat(Node atom); /** elimination for regular expression star */ - Node eliminateStar(Node atom); + static Node eliminateStar(Node atom); }; /* class RegExpElimination */ } // namespace strings diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp index 8830b7f93..7e1f42f37 100644 --- a/src/theory/strings/regexp_entail.cpp +++ b/src/theory/strings/regexp_entail.cpp @@ -2,9 +2,9 @@ /*! \file regexp_entail.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -48,6 +48,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, do_next = false; Node xc = mchildren[mchildren.size() - 1]; Node rc = children[children.size() - 1]; + Trace("regexp-ext-rewrite-debug") + << "* " << xc << " in " << rc << std::endl; Assert(rc.getKind() != REGEXP_CONCAT); Assert(xc.getKind() != STRING_CONCAT); if (rc.getKind() == STRING_TO_REGEXP) @@ -57,7 +59,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, children.pop_back(); mchildren.pop_back(); do_next = true; - Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; + Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl; + } + else if (rc[0].isConst() && Word::isEmpty(rc[0])) + { + Trace("regexp-ext-rewrite-debug") + << "- ignore empty RE" << std::endl; + // ignore and continue + children.pop_back(); + do_next = true; } else if (xc.isConst() && rc[0].isConst()) { @@ -65,8 +75,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, size_t index; Node s = Word::splitConstant(xc, rc[0], index, t == 0); Trace("regexp-ext-rewrite-debug") - << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " - << s << " " << index << " " << t << std::endl; + << "- CRE: Regexp const split : " << xc << " " << rc[0] + << " -> " << s << " " << index << " " << t << std::endl; if (s.isNull()) { Trace("regexp-ext-rewrite-debug") @@ -76,7 +86,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, else { Trace("regexp-ext-rewrite-debug") - << "...strip equal const" << std::endl; + << "- strip equal const" << std::endl; children.pop_back(); mchildren.pop_back(); if (index == 0) @@ -88,6 +98,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); } } + Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl; do_next = true; } } @@ -97,7 +108,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, CVC4::String s = xc.getConst<String>(); if (Word::isEmpty(xc)) { - Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; + Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl; // ignore and continue mchildren.pop_back(); do_next = true; @@ -127,6 +138,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, } else { + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; return nm->mkConst(false); } } @@ -135,19 +148,23 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, // see if any/each child does not work bool result_valid = true; Node result; - Node emp_s = nm->mkConst(::CVC4::String("")); + Node emp_s = nm->mkConst(String("")); for (unsigned i = 0; i < rc.getNumChildren(); i++) { std::vector<Node> mchildren_s; std::vector<Node> children_s; mchildren_s.push_back(xc); utils::getConcat(rc[i], children_s); + Trace("regexp-ext-rewrite-debug") << push; Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + Trace("regexp-ext-rewrite-debug") << pop; if (!ret.isNull()) { // one conjunct cannot be satisfied, return false if (rc.getKind() == REGEXP_INTER) { + Trace("regexp-ext-rewrite-debug") + << "...return " << ret << std::endl; return ret; } } @@ -182,10 +199,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, { // all disjuncts cannot be satisfied, return false Assert(rc.getKind() == REGEXP_UNION); + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; return nm->mkConst(false); } else { + Trace("regexp-ext-rewrite-debug") + << "- same result, try again, children now " << children + << std::endl; // all branches led to the same result children.pop_back(); mchildren.pop_back(); @@ -210,17 +232,19 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, std::vector<Node> children_s; utils::getConcat(rc[0], children_s); Trace("regexp-ext-rewrite-debug") - << "...recursive call on body of star" << std::endl; + << "- recursive call on body of star" << std::endl; + Trace("regexp-ext-rewrite-debug") << push; Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + Trace("regexp-ext-rewrite-debug") << pop; if (!ret.isNull()) { Trace("regexp-ext-rewrite-debug") - << "CRE : regexp star infeasable " << xc << " " << rc + << "- CRE : regexp star infeasable " << xc << " " << rc << std::endl; children.pop_back(); if (!children.empty()) { - Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; + Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl; do_next = true; } } @@ -244,16 +268,22 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, std::reverse(mchildren_ss.begin(), mchildren_ss.end()); std::reverse(children_ss.begin(), children_ss.end()); } - if (simpleRegexpConsume(mchildren_ss, children_ss, t) - .isNull()) + Trace("regexp-ext-rewrite-debug") + << "- recursive call required repeat star" << std::endl; + Trace("regexp-ext-rewrite-debug") << push; + Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t); + Trace("regexp-ext-rewrite-debug") << pop; + if (rets.isNull()) { can_skip = true; } } if (!can_skip) { + TypeNode stype = nm->stringType(); + Node prev = utils::mkConcat(mchildren, stype); Trace("regexp-ext-rewrite-debug") - << "...can't skip" << std::endl; + << "- can't skip" << std::endl; // take the result of fully consuming once if (t == 1) { @@ -262,12 +292,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, mchildren.clear(); mchildren.insert( mchildren.end(), mchildren_s.begin(), mchildren_s.end()); - do_next = true; + Node curr = utils::mkConcat(mchildren, stype); + do_next = (prev != curr); + Trace("regexp-ext-rewrite-debug") + << "- do_next = " << do_next << std::endl; } else { Trace("regexp-ext-rewrite-debug") - << "...can skip " << rc << " from " << xc << std::endl; + << "- can skip " << rc << " from " << xc << std::endl; } } } @@ -276,7 +309,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, if (!do_next) { Trace("regexp-ext-rewrite") - << "Cannot consume : " << xc << " " << rc << std::endl; + << "- cannot consume : " << xc << " " << rc << std::endl; } } } @@ -286,6 +319,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, std::reverse(mchildren.begin(), mchildren.end()); } } + Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl; return Node::null(); } diff --git a/src/theory/strings/regexp_entail.h b/src/theory/strings/regexp_entail.h index ef3f2d34e..9fb797c45 100644 --- a/src/theory/strings/regexp_entail.h +++ b/src/theory/strings/regexp_entail.h @@ -2,9 +2,9 @@ /*! \file regexp_entail.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index b99124ac3..a91210a7b 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -2,9 +2,9 @@ /*! \file regexp_operation.cpp ** \verbatim ** Top contributors (to current version): - ** Tianyi Liang, Andrew Reynolds, Tim King + ** Tianyi Liang, Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -40,7 +40,7 @@ RegExpOpr::RegExpOpr() std::vector<Node>{})), d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)) { - d_emptyString = Word::mkEmptyWord(CONST_STRING); + d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType()); d_emptySingleton = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString); diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 689c35f3d..d0b0755eb 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -2,9 +2,9 @@ /*! \file regexp_operation.h ** \verbatim ** Top contributors (to current version): - ** Tianyi Liang, Andrew Reynolds + ** Andrew Reynolds, Tianyi Liang, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index db7e2d836..53c6c9acc 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -2,9 +2,9 @@ /*! \file regexp_solver.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -599,7 +599,8 @@ bool RegExpSolver::deriveRegExp(Node x, Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x << ", r= " << r << std::endl; CVC4::String s = getHeadConst(x); - if (!s.empty() && d_regexp_opr.checkConstRegExp(r)) + // only allow RE_DERIVE for concrete constant regular expressions + if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT) { Node conc = Node::null(); Node dc = r; @@ -671,6 +672,7 @@ Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp) case REGEXP_UNION: case REGEXP_INTER: case REGEXP_STAR: + case REGEXP_COMPLEMENT: { std::vector<Node> vec_nodes; for (const Node& cr : r) diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h index b44c6c8d9..9e9ba5845 100644 --- a/src/theory/strings/regexp_solver.h +++ b/src/theory/strings/regexp_solver.h @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Tianyi Liang, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 2953a2b3c..68b510de6 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -2,9 +2,9 @@ /*! \file rewrites.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -111,6 +111,9 @@ const char* toString(Rewrite r) case Rewrite::RPL_RPL_EMPTY: return "RPL_RPL_EMPTY"; case Rewrite::RPL_RPL_LEN_ID: return "RPL_RPL_LEN_ID"; case Rewrite::RPL_X_Y_X_SIMP: return "RPL_X_Y_X_SIMP"; + case Rewrite::REPLACE_RE_EVAL: return "REPLACE_RE_EVAL"; + case Rewrite::REPLACE_RE_ALL_EVAL: return "REPLACE_RE_ALL_EVAL"; + case Rewrite::REPLACE_RE_EMP_RE: return "REPLACE_RE_EMP_RE"; case Rewrite::SPLIT_EQ: return "SPLIT_EQ"; case Rewrite::SPLIT_EQ_STRIP_L: return "SPLIT_EQ_STRIP_L"; case Rewrite::SPLIT_EQ_STRIP_R: return "SPLIT_EQ_STRIP_R"; @@ -199,7 +202,9 @@ const char* toString(Rewrite r) case Rewrite::LEN_CONCAT: return "LEN_CONCAT"; case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV"; case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV"; + case Rewrite::LEN_SEQ_UNIT: return "LEN_SEQ_UNIT"; case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM"; + case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL"; default: return "?"; } } diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index 7a315ebd3..ccbdbc0cd 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -2,9 +2,9 @@ /*! \file rewrites.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -116,6 +116,9 @@ enum class Rewrite : uint32_t RPL_RPL_EMPTY, RPL_RPL_LEN_ID, RPL_X_Y_X_SIMP, + REPLACE_RE_EVAL, + REPLACE_RE_ALL_EVAL, + REPLACE_RE_EMP_RE, SPLIT_EQ, SPLIT_EQ_STRIP_L, SPLIT_EQ_STRIP_R, @@ -202,7 +205,9 @@ enum class Rewrite : uint32_t LEN_CONCAT, LEN_REPL_INV, LEN_CONV_INV, - CHARAT_ELIM + LEN_SEQ_UNIT, + CHARAT_ELIM, + SEQ_UNIT_EVAL }; /** diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 2d2ec0af0..bb0fa1d97 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -264,7 +264,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // Add a constant string to the side with more `cn`s to restore // the difference in number of `cn`s std::vector<Node> vec(diff, cn); - trimmed[j].push_back(Word::mkWord(vec)); + trimmed[j].push_back(Word::mkWordFlatten(vec)); } } @@ -576,6 +576,11 @@ Node SequencesRewriter::rewriteLength(Node node) Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]); return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV); } + else if (nk0 == SEQ_UNIT) + { + Node retNode = nm->mkConst(Rational(1)); + return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT); + } return node; } @@ -602,7 +607,7 @@ Node SequencesRewriter::rewriteConcat(Node node) std::vector<Node> wvec; wvec.push_back(preNode); wvec.push_back(tmpNode[0]); - preNode = Word::mkWord(wvec); + preNode = Word::mkWordFlatten(wvec); node_vec.push_back(preNode); } else @@ -644,7 +649,7 @@ Node SequencesRewriter::rewriteConcat(Node node) std::vector<Node> vec; vec.push_back(preNode); vec.push_back(tmpNode); - preNode = Word::mkWord(vec); + preNode = Word::mkWordFlatten(vec); } } } @@ -1315,9 +1320,13 @@ Node SequencesRewriter::rewriteMembership(TNode node) } else { + Node prev = retNode; retNode = nm->mkNode( STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r); - success = true; + // Iterate again if the node changed. It may not have changed if + // nothing was consumed from mchildren (e.g. if the body of the + // re.* accepts the empty string. + success = (retNode != prev); } } } @@ -1413,6 +1422,14 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteReplaceAll(node); } + else if (nk == kind::STRING_REPLACE_RE) + { + retNode = rewriteReplaceRe(node); + } + else if (nk == kind::STRING_REPLACE_RE_ALL) + { + retNode = rewriteReplaceReAll(node); + } else if (nk == STRING_REV) { retNode = rewriteStrReverse(node); @@ -1461,6 +1478,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteRepeatRegExp(node); } + else if (nk == SEQ_UNIT) + { + retNode = rewriteSeqUnit(node); + } Trace("sequences-postrewrite") << "Strings::SequencesRewriter::postRewrite returning " << retNode @@ -1820,6 +1841,7 @@ Node SequencesRewriter::rewriteContains(Node node) nb << emp.eqNode(t); for (const Node& c : vec) { + Assert(c.getType() == t.getType()); nb << c.eqNode(t); } @@ -2910,6 +2932,121 @@ Node SequencesRewriter::rewriteReplaceInternal(Node node) return Node::null(); } +Node SequencesRewriter::rewriteReplaceRe(Node node) +{ + Assert(node.getKind() == STRING_REPLACE_RE); + NodeManager* nm = NodeManager::currentNM(); + Node x = node[0]; + Node y = node[1]; + Node z = node[2]; + + if (x.isConst()) + { + // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z" + std::pair<size_t, size_t> match = firstMatch(x, y); + if (match.first != string::npos) + { + String s = x.getConst<String>(); + Node ret = nm->mkNode(STRING_CONCAT, + nm->mkConst(s.substr(0, match.first)), + z, + nm->mkConst(s.substr(match.second))); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL); + } + else + { + return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL); + } + } + // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true + String emptyStr(""); + if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y)) + { + Node ret = nm->mkNode(STRING_CONCAT, z, x); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE); + } + return node; +} + +Node SequencesRewriter::rewriteReplaceReAll(Node node) +{ + Assert(node.getKind() == STRING_REPLACE_RE_ALL); + NodeManager* nm = NodeManager::currentNM(); + Node x = node[0]; + Node y = node[1]; + Node z = node[2]; + + if (x.isConst()) + { + // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) ---> + // "Z" ++ y ++ "Z" ++ y + TypeNode t = x.getType(); + Node emp = Word::mkEmptyWord(t); + Node yp = Rewriter::rewrite( + nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp))); + std::vector<Node> res; + String rem = x.getConst<String>(); + std::pair<size_t, size_t> match(0, 0); + while (rem.size() >= 0) + { + match = firstMatch(nm->mkConst(rem), yp); + if (match.first == string::npos) + { + break; + } + res.push_back(nm->mkConst(rem.substr(0, match.first))); + res.push_back(z); + rem = rem.substr(match.second); + } + res.push_back(nm->mkConst(rem)); + Node ret = utils::mkConcat(res, t); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL); + } + + return node; +} + +std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r) +{ + Assert(n.isConst() && n.getType().isStringLike()); + Assert(r.getType().isRegExp()); + NodeManager* nm = NodeManager::currentNM(); + + std::vector<Node> emptyVec; + Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); + Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar); + String s = n.getConst<String>(); + + if (s.size() == 0) + { + if (RegExpEntail::testConstStringInRegExp(s, 0, r)) + { + return std::make_pair(0, 0); + } + else + { + return std::make_pair(string::npos, string::npos); + } + } + + for (size_t i = 0, size = s.size(); i < size; i++) + { + if (RegExpEntail::testConstStringInRegExp(s, i, re)) + { + for (size_t j = i; j <= size; j++) + { + String substr = s.substr(i, j - i); + if (RegExpEntail::testConstStringInRegExp(substr, 0, r)) + { + return std::make_pair(i, j); + } + } + } + } + + return std::make_pair(string::npos, string::npos); +} + Node SequencesRewriter::rewriteStrReverse(Node node) { Assert(node.getKind() == STRING_REV); @@ -3040,7 +3177,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) Assert(ratLen.getDenominator() == 1); Integer intLen = ratLen.getNumerator(); uint32_t u = intLen.getUnsignedInt(); - if (stype.isString()) + if (stype.isString()) // string-only { res = nm->mkConst(String(std::string(u, 'A'))); } @@ -3095,6 +3232,19 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) return res; } +Node SequencesRewriter::rewriteSeqUnit(Node node) +{ + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + std::vector<Expr> seq; + seq.push_back(node[0].toExpr()); + TypeNode stype = node[0].getType(); + Node ret = nm->mkConst(ExprSequence(stype.toType(), seq)); + return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL); + } + return node; +} Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r) { diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 56b74f536..8da672cb5 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -2,9 +2,9 @@ /*! \file sequences_rewriter.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -204,6 +204,33 @@ class SequencesRewriter : public TheoryRewriter * str.replaceall. If it returns a non-null ret, then node rewrites to ret. */ Node rewriteReplaceInternal(Node node); + /** rewrite regular expression replace + * + * This method implements rewrite rules that apply to terms of the form + * str.replace_re(s, r, t). + * + * @param node The node to rewrite + * @return The rewritten node + */ + Node rewriteReplaceRe(Node node); + /** rewrite regular expression replace + * + * This method implements rewrite rules that apply to terms of the form + * str.replace_re_all(s, r, t). + * + * @param node The node to rewrite + * @return The rewritten node + */ + Node rewriteReplaceReAll(Node node); + /** + * Returns the first, shortest sequence in n that matches r. + * + * @param n The constant string or sequence to search in. + * @param r The regular expression to search for. + * @return A pair holding the start position and the end position of the + * match or a pair of string::npos if r does not appear in n. + */ + std::pair<size_t, size_t> firstMatch(Node n, Node r); /** rewrite string reverse * * This is the entry point for post-rewriting terms n of the form @@ -224,6 +251,12 @@ class SequencesRewriter : public TheoryRewriter * Returns the rewritten form of node. */ Node rewriteStringToCode(Node node); + /** rewrite seq.unit + * This is the entry point for post-rewriting terms n of the form + * seq.unit( t ) + * Returns the rewritten form of node. + */ + Node rewriteSeqUnit(Node node); /** length preserving rewrite * diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp index c830a68bd..502d05353 100644 --- a/src/theory/strings/sequences_stats.cpp +++ b/src/theory/strings/sequences_stats.cpp @@ -2,7 +2,7 @@ /*! \file sequences_stats.cpp ** \verbatim ** Top contributors (to current version): - ** Andres Noetzli + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h index 8a1564587..85f830fa2 100644 --- a/src/theory/strings/sequences_stats.h +++ b/src/theory/strings/sequences_stats.h @@ -2,7 +2,7 @@ /*! \file sequences_stats.h ** \verbatim ** Top contributors (to current version): - ** Andres Noetzli + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 4af75f1cc..8fb854d91 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -82,12 +82,13 @@ Node SkolemCache::mkTypedSkolemCached( } NodeManager* nm = NodeManager::currentNM(); + SkolemManager* sm = nm->getSkolemManager(); Node sk; switch (id) { // exists k. k = a case SK_PURIFY: - sk = ProofSkolemCache::mkPurifySkolem(a, c, "string purify skolem"); + sk = sm->mkPurifySkolem(a, c, "string purify skolem"); break; // these are eliminated by normalizeStringSkolem case SK_ID_V_SPT: @@ -113,7 +114,7 @@ Node SkolemCache::mkTypedSkolemCached( Notice() << "Don't know how to handle Skolem ID " << id << std::endl; Node v = nm->mkBoundVar(tn); Node cond = nm->mkConst(true); - sk = ProofSkolemCache::mkSkolem(v, cond, c, "string skolem"); + sk = sm->mkSkolem(v, cond, c, "string skolem"); } break; } diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 0ebbb3277..302c69e83 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -2,9 +2,9 @@ /*! \file skolem_cache.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli + ** Andrew Reynolds, Andres Noetzli, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -22,7 +22,7 @@ #include <unordered_set> #include "expr/node.h" -#include "expr/proof_skolem_cache.h" +#include "expr/skolem_manager.h" namespace CVC4 { namespace theory { @@ -102,6 +102,16 @@ class SkolemCache // of b in a as the witness for contains( a, b ). SK_FIRST_CTN_PRE, SK_FIRST_CTN_POST, + // For sequence a and regular expression b, + // in_re(a, re.++(_*, b, _*)) => + // exists k_pre, k_match, k_post. + // a = k_pre ++ k_match ++ k_post ^ + // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1), + // re.++(_*, b, _*)) ^ + // in_re(k2, y) + SK_FIRST_MATCH_PRE, + SK_FIRST_MATCH, + SK_FIRST_MATCH_POST, // For integer b, // len( a ) > b => // exists k. a = k ++ a' ^ len( k ) = b @@ -120,6 +130,14 @@ class SkolemCache // k(x) is the end index of the x^th occurrence of b in a // where n is the number of occurrences of b in a, and k(0)=0. SK_OCCUR_INDEX, + // For function k: Int -> Int + // exists k. + // forall 0 <= x < n, + // k(x) is the length of the x^th occurrence of b in a (excluding + // matches of empty strings) + // where b is a regular expression, n is the number of occurrences of b + // in a, and k(0)=0. + SK_OCCUR_LEN, }; /** * Returns a skolem of type string that is cached for (a,b,id) and has diff --git a/src/theory/strings/solver_state.cpp b/src/theory/strings/solver_state.cpp index 622e919f7..06a86935f 100644 --- a/src/theory/strings/solver_state.cpp +++ b/src/theory/strings/solver_state.cpp @@ -2,9 +2,9 @@ /*! \file solver_state.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -297,31 +297,39 @@ std::pair<bool, Node> SolverState::entailmentCheck(options::TheoryOfMode mode, return d_valuation.entailmentCheck(mode, lit); } -void SolverState::separateByLength(const std::vector<Node>& n, - std::vector<std::vector<Node> >& cols, - std::vector<Node>& lts) +void SolverState::separateByLength( + const std::vector<Node>& n, + std::map<TypeNode, std::vector<std::vector<Node>>>& cols, + std::map<TypeNode, std::vector<Node>>& lts) { unsigned leqc_counter = 0; - std::map<Node, unsigned> eqc_to_leqc; - std::map<unsigned, Node> leqc_to_eqc; + // map (length, type) to an equivalence class identifier + std::map<std::pair<Node, TypeNode>, unsigned> eqc_to_leqc; + // backwards map + std::map<unsigned, std::pair<Node, TypeNode>> leqc_to_eqc; + // Collection of eqc for each identifier. Notice that some identifiers may + // not have an associated length in the mappings above, if the length of + // an equivalence class is unknown. std::map<unsigned, std::vector<Node> > eqc_to_strings; NodeManager* nm = NodeManager::currentNM(); for (const Node& eqc : n) { Assert(d_ee.getRepresentative(eqc) == eqc); + TypeNode tnEqc = eqc.getType(); EqcInfo* ei = getOrMakeEqcInfo(eqc, false); Node lt = ei ? ei->d_lengthTerm : Node::null(); if (!lt.isNull()) { lt = nm->mkNode(STRING_LENGTH, lt); Node r = d_ee.getRepresentative(lt); - if (eqc_to_leqc.find(r) == eqc_to_leqc.end()) + std::pair<Node, TypeNode> lkey(r, tnEqc); + if (eqc_to_leqc.find(lkey) == eqc_to_leqc.end()) { - eqc_to_leqc[r] = leqc_counter; - leqc_to_eqc[leqc_counter] = r; + eqc_to_leqc[lkey] = leqc_counter; + leqc_to_eqc[leqc_counter] = lkey; leqc_counter++; } - eqc_to_strings[eqc_to_leqc[r]].push_back(eqc); + eqc_to_strings[eqc_to_leqc[lkey]].push_back(eqc); } else { @@ -331,9 +339,11 @@ void SolverState::separateByLength(const std::vector<Node>& n, } for (const std::pair<const unsigned, std::vector<Node> >& p : eqc_to_strings) { - cols.push_back(std::vector<Node>()); - cols.back().insert(cols.back().end(), p.second.begin(), p.second.end()); - lts.push_back(leqc_to_eqc[p.first]); + Assert(!p.second.empty()); + // get the type of the collection + TypeNode stn = p.second[0].getType(); + cols[stn].emplace_back(p.second.begin(), p.second.end()); + lts[stn].push_back(leqc_to_eqc[p.first].first); } } diff --git a/src/theory/strings/solver_state.h b/src/theory/strings/solver_state.h index d43c600f4..2eee90ca4 100644 --- a/src/theory/strings/solver_state.h +++ b/src/theory/strings/solver_state.h @@ -2,9 +2,9 @@ /*! \file solver_state.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Tianyi Liang, Morgan Deters ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -171,11 +171,14 @@ class SolverState * * Separate the string representatives in argument n into a partition cols * whose collections have equal length. The i^th vector in cols has length - * lts[i] for all elements in col. + * lts[i] for all elements in col. These vectors are furthmore separated + * by string-like type. */ - void separateByLength(const std::vector<Node>& n, - std::vector<std::vector<Node> >& cols, - std::vector<Node>& lts); + void separateByLength( + const std::vector<Node>& n, + std::map<TypeNode, std::vector<std::vector<Node>>>& cols, + std::map<TypeNode, std::vector<Node>>& lts); + private: /** Common constants */ Node d_zero; diff --git a/src/theory/strings/strategy.cpp b/src/theory/strings/strategy.cpp new file mode 100644 index 000000000..549bba9d6 --- /dev/null +++ b/src/theory/strings/strategy.cpp @@ -0,0 +1,161 @@ +/********************* */ +/*! \file strategy.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of the strategy of the theory of strings. + **/ + +#include "theory/strings/strategy.h" + +#include "options/strings_options.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +std::ostream& operator<<(std::ostream& out, InferStep s) +{ + switch (s) + { + case BREAK: out << "break"; break; + case CHECK_INIT: out << "check_init"; break; + case CHECK_CONST_EQC: out << "check_const_eqc"; break; + case CHECK_EXTF_EVAL: out << "check_extf_eval"; break; + case CHECK_CYCLES: out << "check_cycles"; break; + case CHECK_FLAT_FORMS: out << "check_flat_forms"; break; + case CHECK_NORMAL_FORMS_EQ: out << "check_normal_forms_eq"; break; + case CHECK_NORMAL_FORMS_DEQ: out << "check_normal_forms_deq"; break; + case CHECK_CODES: out << "check_codes"; break; + case CHECK_LENGTH_EQC: out << "check_length_eqc"; break; + case CHECK_EXTF_REDUCTION: out << "check_extf_reduction"; break; + case CHECK_MEMBERSHIP: out << "check_membership"; break; + case CHECK_CARDINALITY: out << "check_cardinality"; break; + default: out << "?"; break; + } + return out; +} + +Strategy::Strategy() : d_strategy_init(false) {} + +Strategy::~Strategy() {} + +bool Strategy::isStrategyInit() const { return d_strategy_init; } + +bool Strategy::hasStrategyEffort(Theory::Effort e) const +{ + return d_strat_steps.find(e) != d_strat_steps.end(); +} + +std::vector<std::pair<InferStep, int> >::iterator Strategy::stepBegin( + Theory::Effort e) +{ + std::map<Theory::Effort, std::pair<unsigned, unsigned> >::const_iterator it = + d_strat_steps.find(e); + Assert(it != d_strat_steps.end()); + return d_infer_steps.begin() + it->second.first; +} + +std::vector<std::pair<InferStep, int> >::iterator Strategy::stepEnd( + Theory::Effort e) +{ + std::map<Theory::Effort, std::pair<unsigned, unsigned> >::const_iterator it = + d_strat_steps.find(e); + Assert(it != d_strat_steps.end()); + return d_infer_steps.begin() + it->second.second; +} + +void Strategy::addStrategyStep(InferStep s, int effort, bool addBreak) +{ + // must run check init first + Assert((s == CHECK_INIT) == d_infer_steps.empty()); + d_infer_steps.push_back(std::pair<InferStep, int>(s, effort)); + if (addBreak) + { + d_infer_steps.push_back(std::pair<InferStep, int>(BREAK, 0)); + } +} + +void Strategy::initializeStrategy() +{ + // initialize the strategy if not already done so + if (!d_strategy_init) + { + std::map<Theory::Effort, unsigned> step_begin; + std::map<Theory::Effort, unsigned> step_end; + d_strategy_init = true; + // beginning indices + step_begin[Theory::EFFORT_FULL] = 0; + if (options::stringEager()) + { + step_begin[Theory::EFFORT_STANDARD] = 0; + } + // add the inference steps + addStrategyStep(CHECK_INIT); + addStrategyStep(CHECK_CONST_EQC); + addStrategyStep(CHECK_EXTF_EVAL, 0); + // we must check cycles before using flat forms + addStrategyStep(CHECK_CYCLES); + if (options::stringFlatForms()) + { + addStrategyStep(CHECK_FLAT_FORMS); + } + addStrategyStep(CHECK_EXTF_REDUCTION, 1); + if (options::stringEager()) + { + // do only the above inferences at standard effort, if applicable + step_end[Theory::EFFORT_STANDARD] = d_infer_steps.size() - 1; + } + if (!options::stringEagerLen()) + { + addStrategyStep(CHECK_REGISTER_TERMS_PRE_NF); + } + addStrategyStep(CHECK_NORMAL_FORMS_EQ); + addStrategyStep(CHECK_EXTF_EVAL, 1); + if (!options::stringEagerLen() && options::stringLenNorm()) + { + addStrategyStep(CHECK_LENGTH_EQC, 0, false); + addStrategyStep(CHECK_REGISTER_TERMS_NF); + } + addStrategyStep(CHECK_NORMAL_FORMS_DEQ); + addStrategyStep(CHECK_CODES); + if (options::stringEagerLen() && options::stringLenNorm()) + { + addStrategyStep(CHECK_LENGTH_EQC); + } + if (options::stringExp() && !options::stringGuessModel()) + { + addStrategyStep(CHECK_EXTF_REDUCTION, 2); + } + addStrategyStep(CHECK_MEMBERSHIP); + addStrategyStep(CHECK_CARDINALITY); + step_end[Theory::EFFORT_FULL] = d_infer_steps.size() - 1; + if (options::stringExp() && options::stringGuessModel()) + { + step_begin[Theory::EFFORT_LAST_CALL] = d_infer_steps.size(); + // these two steps are run in parallel + addStrategyStep(CHECK_EXTF_REDUCTION, 2, false); + addStrategyStep(CHECK_EXTF_EVAL, 3); + step_end[Theory::EFFORT_LAST_CALL] = d_infer_steps.size() - 1; + } + // set the beginning/ending ranges + for (const std::pair<const Theory::Effort, unsigned>& it_begin : step_begin) + { + Theory::Effort e = it_begin.first; + std::map<Theory::Effort, unsigned>::iterator it_end = step_end.find(e); + Assert(it_end != step_end.end()); + d_strat_steps[e] = + std::pair<unsigned, unsigned>(it_begin.second, it_end->second); + } + } +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/strategy.h b/src/theory/strings/strategy.h new file mode 100644 index 000000000..9afb6a92f --- /dev/null +++ b/src/theory/strings/strategy.h @@ -0,0 +1,116 @@ +/********************* */ +/*! \file strategy.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Strategy of the theory of strings + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__STRATEGY_H +#define CVC4__THEORY__STRINGS__STRATEGY_H + +#include <map> +#include <vector> + +#include "theory/theory.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** inference steps + * + * Corresponds to a step in the overall strategy of the strings solver. For + * details on the individual steps, see documentation on the inference schemas + * within Strategy. + */ +enum InferStep +{ + // indicates that the strategy should break if lemmas or facts are added + BREAK, + // check initial + CHECK_INIT, + // check constant equivalence classes + CHECK_CONST_EQC, + // check extended function evaluation + CHECK_EXTF_EVAL, + // check cycles + CHECK_CYCLES, + // check flat forms + CHECK_FLAT_FORMS, + // check register terms pre-normal forms + CHECK_REGISTER_TERMS_PRE_NF, + // check normal forms equalities + CHECK_NORMAL_FORMS_EQ, + // check normal forms disequalities + CHECK_NORMAL_FORMS_DEQ, + // check codes + CHECK_CODES, + // check lengths for equivalence classes + CHECK_LENGTH_EQC, + // check register terms for normal forms + CHECK_REGISTER_TERMS_NF, + // check extended function reductions + CHECK_EXTF_REDUCTION, + // check regular expression memberships + CHECK_MEMBERSHIP, + // check cardinality + CHECK_CARDINALITY, +}; +std::ostream& operator<<(std::ostream& out, InferStep i); + +/** + * The strategy of theory of strings. + * + * This stores a sequence of the above enum that indicates the calls to + * runInferStep to make on the theory of strings, given by parent. + */ +class Strategy +{ + public: + Strategy(); + ~Strategy(); + /** is this strategy initialized? */ + bool isStrategyInit() const; + /** do we have a strategy for effort e? */ + bool hasStrategyEffort(Theory::Effort e) const; + /** begin and end iterators for effort e */ + std::vector<std::pair<InferStep, int> >::iterator stepBegin(Theory::Effort e); + std::vector<std::pair<InferStep, int> >::iterator stepEnd(Theory::Effort e); + /** initialize the strategy + * + * This initializes the above information based on the options. This makes + * a series of calls to addStrategyStep above. + */ + void initializeStrategy(); + + private: + /** add strategy step + * + * This adds (s,effort) as a strategy step to the vectors d_infer_steps and + * d_infer_step_effort. This indicates that a call to runInferStep should + * be run as the next step in the strategy. If addBreak is true, we add + * a BREAK to the strategy following this step. + */ + void addStrategyStep(InferStep s, int effort = 0, bool addBreak = true); + /** is strategy initialized */ + bool d_strategy_init; + /** the strategy */ + std::vector<std::pair<InferStep, int> > d_infer_steps; + /** the range (begin, end) of steps to run at given efforts */ + std::map<Theory::Effort, std::pair<unsigned, unsigned> > d_strat_steps; +}; /* class Strategy */ + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__STRATEGY_H */ diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp index 311eda554..9f502e1f6 100644 --- a/src/theory/strings/strings_entail.cpp +++ b/src/theory/strings/strings_entail.cpp @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -37,8 +37,6 @@ bool StringsEntail::canConstantContainConcat(Node c, int& lastc) { Assert(c.isConst()); - CVC4::String t = c.getConst<String>(); - const std::vector<unsigned>& tvec = t.getVec(); Assert(n.getKind() == STRING_CONCAT); // must find constant components in order size_t pos = 0; @@ -50,19 +48,20 @@ bool StringsEntail::canConstantContainConcat(Node c, { firstc = firstc == -1 ? i : firstc; lastc = i; - CVC4::String s = n[i].getConst<String>(); - size_t new_pos = t.find(s, pos); + size_t new_pos = Word::find(c, n[i], pos); if (new_pos == std::string::npos) { return false; } else { - pos = new_pos + s.size(); + pos = new_pos + Word::getLength(n[i]); } } else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0])) { + Assert(c.getType().isString()); + const std::vector<unsigned>& tvec = c.getConst<String>().getVec(); // find the first occurrence of a digit starting at pos while (pos < tvec.size() && !String::isDigit(tvec[pos])) { @@ -116,7 +115,8 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, { Assert(dir == 1 || dir == -1); Assert(nr.empty()); - Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); + NodeManager* nm = NodeManager::currentNM(); + Node zero = nm->mkConst(CVC4::Rational(0)); bool ret = false; bool success; unsigned sindex = 0; @@ -139,18 +139,16 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, if (lbr.sgn() > 0) { Assert(ArithEntail::check(curr, true)); - CVC4::String s = n1[sindex_use].getConst<String>(); - Node ncl = - NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); - Node next_s = - NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl); + Node s = n1[sindex_use]; + size_t slen = Word::getLength(s); + Node ncl = nm->mkConst(CVC4::Rational(slen)); + Node next_s = nm->mkNode(MINUS, lowerBound, ncl); next_s = Rewriter::rewrite(next_s); Assert(next_s.isConst()); // we can remove the entire constant if (next_s.getConst<Rational>().sgn() >= 0) { - curr = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(MINUS, curr, ncl)); + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); success = true; sindex++; } @@ -160,25 +158,20 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, // lower bound minus the length of a concrete string is negative, // hence lowerBound cannot be larger than long max Assert(lbr < Rational(String::maxSize())); - curr = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound)); + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); - Assert(lbsize < s.size()); + Assert(lbsize < slen); if (dir == 1) { // strip partially from the front - nr.push_back( - NodeManager::currentNM()->mkConst(s.prefix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.suffix(s.size() - lbsize)); + nr.push_back(Word::prefix(s, lbsize)); + n1[sindex_use] = Word::suffix(s, slen - lbsize); } else { // strip partially from the back - nr.push_back( - NodeManager::currentNM()->mkConst(s.suffix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.prefix(s.size() - lbsize)); + nr.push_back(Word::suffix(s, lbsize)); + n1[sindex_use] = Word::prefix(s, slen - lbsize); } ret = true; } @@ -496,8 +489,6 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, { Assert(nb.empty()); Assert(ne.empty()); - - NodeManager* nm = NodeManager::currentNM(); bool changed = false; // for ( forwards, backwards ) direction for (unsigned r = 0; r < 2; r++) @@ -509,7 +500,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, bool removeComponent = false; Node n1cmp = n1[index0]; - if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0) + if (n1cmp.isConst() && Word::isEmpty(n1cmp)) { return false; } @@ -522,14 +513,15 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, << ", dir = " << dir << std::endl; if (n1cmp.isConst()) { - CVC4::String s = n1cmp.getConst<String>(); + Node s = n1cmp; + size_t slen = Word::getLength(s); // overlap is an overapproximation of the number of characters // n2[index1] can match in s - unsigned overlap = s.size(); + unsigned overlap = Word::getLength(s); if (n2[index1].isConst()) { - CVC4::String t = n2[index1].getConst<String>(); - std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); + Node t = n2[index1]; + std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t); if (ret == std::string::npos) { if (n1.size() == 1) @@ -545,7 +537,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, // This is used to partially strip off the endpoint // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> // str.contains( str.++( "c", x ), str.++( "cd", y ) ) - overlap = r == 0 ? s.overlap(t) : t.overlap(s); + overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s); } else { @@ -553,19 +545,20 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, // if there is no overlap // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) // --> str.contains( x, "a" ) - removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); + removeComponent = + ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0); } } else if (sss.empty()) // only if not substr { - Assert(ret < s.size()); + Assert(ret < slen); // can strip off up to the find position, e.g. // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> // str.contains( str.++( "bc", x ), str.++( "b", y ) ), // and // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) - overlap = s.size() - ret; + overlap = slen - ret; } } else @@ -573,7 +566,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, // inconclusive } // process the overlap - if (overlap < s.size()) + if (overlap < slen) { changed = true; if (overlap == 0) @@ -586,13 +579,13 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, // component if (r == 0) { - nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.suffix(overlap)); + nb.push_back(Word::prefix(s, slen - overlap)); + n1[index0] = Word::suffix(s, overlap); } else { - ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.prefix(overlap)); + ne.push_back(Word::suffix(s, slen - overlap)); + n1[index0] = Word::prefix(s, overlap); } } } @@ -601,8 +594,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, { if (n2[index1].isConst()) { + Assert(n2[index1].getType().isString()); CVC4::String t = n2[index1].getConst<String>(); - if (n1.size() == 1) { // if n1.size()==1, then if n2[index1] is not a number, we can drop diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h index 379c09043..3eb77c5f5 100644 --- a/src/theory/strings/strings_entail.h +++ b/src/theory/strings/strings_entail.h @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/strings_fmf.cpp b/src/theory/strings/strings_fmf.cpp index 87ce5e7c6..9530171f0 100644 --- a/src/theory/strings/strings_fmf.cpp +++ b/src/theory/strings/strings_fmf.cpp @@ -2,9 +2,9 @@ /*! \file strings_fmf.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/strings_fmf.h b/src/theory/strings/strings_fmf.h index 9dfb1f590..f66c23d5b 100644 --- a/src/theory/strings/strings_fmf.h +++ b/src/theory/strings/strings_fmf.h @@ -2,9 +2,9 @@ /*! \file strings_fmf.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp index bd749576e..76391cc0d 100644 --- a/src/theory/strings/strings_rewriter.cpp +++ b/src/theory/strings/strings_rewriter.cpp @@ -2,9 +2,9 @@ /*! \file strings_rewriter.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/strings_rewriter.h b/src/theory/strings/strings_rewriter.h index ce4be476d..aadb11f1c 100644 --- a/src/theory/strings/strings_rewriter.h +++ b/src/theory/strings/strings_rewriter.h @@ -2,9 +2,9 @@ /*! \file strings_rewriter.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index 6330d7c10..b4fbbed98 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -2,9 +2,9 @@ /*! \file term_registry.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -15,6 +15,7 @@ #include "theory/strings/term_registry.h" #include "expr/attribute.h" +#include "options/smt_options.h" #include "options/strings_options.h" #include "smt/logic_exception.h" #include "theory/rewriter.h" @@ -35,23 +36,25 @@ struct StringsProxyVarAttributeId typedef expr::Attribute<StringsProxyVarAttributeId, bool> StringsProxyVarAttribute; -TermRegistry::TermRegistry(context::Context* c, - context::UserContext* u, +TermRegistry::TermRegistry(SolverState& s, eq::EqualityEngine& ee, OutputChannel& out, - SequencesStatistics& statistics) - : d_ee(ee), + SequencesStatistics& statistics, + ProofNodeManager* pnm) + : d_state(s), + d_ee(ee), d_out(out), d_statistics(statistics), d_hasStrCode(false), - d_functionsTerms(c), - d_inputVars(u), - d_preregisteredTerms(u), - d_registeredTerms(u), - d_registeredTypes(u), - d_proxyVar(u), - d_proxyVarToLength(u), - d_lengthLemmaTermsCache(u) + d_functionsTerms(s.getSatContext()), + d_inputVars(s.getUserContext()), + d_preregisteredTerms(s.getUserContext()), + d_registeredTerms(s.getUserContext()), + d_registeredTypes(s.getUserContext()), + d_proxyVar(s.getUserContext()), + d_proxyVarToLength(s.getUserContext()), + d_lengthLemmaTermsCache(s.getUserContext()), + d_epg(nullptr) { NodeManager* nm = NodeManager::currentNM(); d_zero = nm->mkConst(Rational(0)); @@ -62,6 +65,64 @@ TermRegistry::TermRegistry(context::Context* c, TermRegistry::~TermRegistry() {} +Node TermRegistry::eagerReduce(Node t, SkolemCache* sc) +{ + NodeManager* nm = NodeManager::currentNM(); + Node lemma; + Kind tk = t.getKind(); + if (tk == STRING_TO_CODE) + { + // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 ) + Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1))); + Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1))); + Node code_range = nm->mkNode( + AND, + nm->mkNode(GEQ, t, nm->mkConst(Rational(0))), + nm->mkNode( + LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality())))); + lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); + } + else if (tk == STRING_STRIDOF) + { + // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len + // x))) + Node l = utils::mkNLength(t[0]); + lemma = nm->mkNode(AND, + nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))), + nm->mkNode(LEQ, t, l)); + } + else if (tk == STRING_STOI) + { + // (>= (str.to_int x) (- 1)) + lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))); + } + else if (tk == STRING_STRCTN) + { + // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r))) + Node sk1 = + sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1"); + Node sk2 = + sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2"); + lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2)); + lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode()); + } + return lemma; +} + +Node TermRegistry::lengthPositive(Node t) +{ + NodeManager* nm = NodeManager::currentNM(); + Node zero = nm->mkConst(Rational(0)); + Node emp = Word::mkEmptyWord(t.getType()); + Node tlen = nm->mkNode(STRING_LENGTH, t); + Node tlenEqZero = tlen.eqNode(zero); + Node tEqEmp = t.eqNode(emp); + Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp); + Node caseNEmpty = nm->mkNode(GT, tlen, zero); + // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0)) + return nm->mkNode(OR, caseEmpty, caseNEmpty); +} + void TermRegistry::preRegisterTerm(TNode n) { if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end()) @@ -76,9 +137,10 @@ void TermRegistry::preRegisterTerm(TNode n) if (!options::stringExp()) { if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN - || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER - || k == STRING_REV) + || k == STRING_STRREPL || k == STRING_STRREPLALL + || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL + || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER + || k == STRING_TOUPPER || k == STRING_REV) { std::stringstream ss; ss << "Term of kind " << k @@ -201,7 +263,6 @@ void TermRegistry::registerTerm(Node n, int effort) d_registeredTerms.insert(n); // ensure the type is registered registerType(tn); - NodeManager* nm = NodeManager::currentNM(); Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl; Node regTermLem; @@ -212,28 +273,10 @@ void TermRegistry::registerTerm(Node n, int effort) // for concat/const/replace, introduce proxy var and state length relation regTermLem = getRegisterTermLemma(n); } - else if (n.getKind() == STRING_TO_CODE) - { - // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 ) - Node code_len = utils::mkNLength(n[0]).eqNode(d_one); - Node code_eq_neg1 = n.eqNode(d_negOne); - Node code_range = nm->mkNode( - AND, - nm->mkNode(GEQ, n, d_zero), - nm->mkNode( - LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality())))); - regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); - } - else if (n.getKind() == STRING_STRIDOF) + else if (n.getKind() != STRING_STRCTN) { - Node len = utils::mkNLength(n[0]); - regTermLem = nm->mkNode(AND, - nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))), - nm->mkNode(LEQ, n, len)); - } - else if (n.getKind() == STRING_STOI) - { - regTermLem = nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))); + // we don't send out eager reduction lemma for str.contains currently + regTermLem = eagerReduce(n, &d_skCache); } if (!regTermLem.isNull()) { @@ -323,7 +366,9 @@ Node TermRegistry::getRegisterTermLemma(Node n) d_proxyVarToLength[sk] = lsum; Node ceq = Rewriter::rewrite(skl.eqNode(lsum)); - return nm->mkNode(AND, eq, ceq); + Node ret = nm->mkNode(AND, eq, ceq); + + return ret; } void TermRegistry::registerTermAtomic(Node n, LengthStatus s) @@ -406,17 +451,15 @@ Node TermRegistry::getRegisterTermAtomicLemma(Node n, } Assert(s == LENGTH_SPLIT); - std::vector<Node> lems; + // get the positive length lemma + Node lenLemma = lengthPositive(n); // split whether the string is empty Node n_len_eq_z = n_len.eqNode(d_zero); Node n_len_eq_z_2 = n.eqNode(emp); Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2); - case_empty = Rewriter::rewrite(case_empty); - Node case_nempty = nm->mkNode(GT, n_len, d_zero); - if (!case_empty.isConst()) + Node case_emptyr = Rewriter::rewrite(case_empty); + if (!case_emptyr.isConst()) { - Node lem = nm->mkNode(OR, case_empty, case_nempty); - lems.push_back(lem); // prefer trying the empty case first // notice that requirePhase must only be called on rewritten literals that // occur in the CNF stream. @@ -427,24 +470,15 @@ Node TermRegistry::getRegisterTermAtomicLemma(Node n, Assert(!n_len_eq_z_2.isConst()); reqPhase[n_len_eq_z_2] = true; } - else if (!case_empty.getConst<bool>()) - { - // the rewriter knows that n is non-empty - lems.push_back(case_nempty); - } else { // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that // n ---> "". Since this method is only called on non-constants n, it must // be that n = "" ^ len( n ) = 0 does not rewrite to true. - Assert(false); + Assert(!case_emptyr.getConst<bool>()); } - if (lems.empty()) - { - return Node::null(); - } - return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems); + return lenLemma; } Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h index b68e44b81..d0589be90 100644 --- a/src/theory/strings/term_registry.h +++ b/src/theory/strings/term_registry.h @@ -1,10 +1,10 @@ /********************* */ -/*! \file theory_strings.h +/*! \file term_registry.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Tim King + ** Andrew Reynolds, Tim King, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -19,10 +19,13 @@ #include "context/cdhashset.h" #include "context/cdlist.h" +#include "expr/proof_node_manager.h" +#include "theory/eager_proof_generator.h" #include "theory/output_channel.h" #include "theory/strings/infer_info.h" #include "theory/strings/sequences_stats.h" #include "theory/strings/skolem_cache.h" +#include "theory/strings/solver_state.h" #include "theory/uf/equality_engine.h" namespace CVC4 { @@ -46,12 +49,34 @@ class TermRegistry typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap; public: - TermRegistry(context::Context* c, - context::UserContext* u, + TermRegistry(SolverState& s, eq::EqualityEngine& ee, OutputChannel& out, - SequencesStatistics& statistics); + SequencesStatistics& statistics, + ProofNodeManager* pnm); ~TermRegistry(); + /** The eager reduce routine + * + * Constructs a lemma for t that is incomplete, but communicates pertinent + * information about t. This is analogous to StringsPreprocess::reduce. + * + * In practice, we send this lemma eagerly, as soon as t is registered. + * + * @param t The node to reduce, + * @param sc The Skolem cache to use for new variables, + * @return The eager reduction for t. + */ + static Node eagerReduce(Node t, SkolemCache* sc); + /** + * Returns a lemma indicating that the length of a term t whose type is + * string-like has positive length. The exact form of this lemma depends + * on what works best in practice, currently: + * (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0)) + * + * @param t The node to reduce, + * @return The positive length lemma for t. + */ + static Node lengthPositive(Node t); /** * Preregister term, called when TheoryStrings::preRegisterTerm(n) is called. * This does the following: @@ -184,6 +209,8 @@ class TermRegistry Node d_negOne; /** the cardinality of the alphabet */ uint32_t d_cardSize; + /** Reference to the solver state of the theory of strings. */ + SolverState& d_state; /** Reference to equality engine of the theory of strings. */ eq::EqualityEngine& d_ee; /** Reference to the output channel of the theory of strings. */ @@ -224,6 +251,8 @@ class TermRegistry NodeNodeMap d_proxyVarToLength; /** List of terms that we have register length for */ NodeSet d_lengthLemmaTermsCache; + /** Proof generator, manages proofs for lemmas generated by this class */ + std::unique_ptr<EagerProofGenerator> d_epg; /** Register type * * Ensures the theory solver is setup to handle string-like type tn. In diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 5107fa3f9..d1b18df13 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -2,28 +2,22 @@ /*! \file theory_strings.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** Andrew Reynolds, Tianyi Liang, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** ** \brief Implementation of the theory of strings. - ** - ** Implementation of the theory of strings. **/ #include "theory/strings/theory_strings.h" -#include <cmath> - #include "expr/kind.h" #include "options/strings_options.h" #include "options/theory_options.h" -#include "smt/command.h" #include "smt/logic_exception.h" -#include "smt/smt_statistics_registry.h" #include "theory/ext_theory.h" #include "theory/rewriter.h" #include "theory/strings/theory_strings_utils.h" @@ -40,28 +34,6 @@ namespace CVC4 { namespace theory { namespace strings { -std::ostream& operator<<(std::ostream& out, InferStep s) -{ - switch (s) - { - case BREAK: out << "break"; break; - case CHECK_INIT: out << "check_init"; break; - case CHECK_CONST_EQC: out << "check_const_eqc"; break; - case CHECK_EXTF_EVAL: out << "check_extf_eval"; break; - case CHECK_CYCLES: out << "check_cycles"; break; - case CHECK_FLAT_FORMS: out << "check_flat_forms"; break; - case CHECK_NORMAL_FORMS_EQ: out << "check_normal_forms_eq"; break; - case CHECK_NORMAL_FORMS_DEQ: out << "check_normal_forms_deq"; break; - case CHECK_CODES: out << "check_codes"; break; - case CHECK_LENGTH_EQC: out << "check_length_eqc"; break; - case CHECK_EXTF_REDUCTION: out << "check_extf_reduction"; break; - case CHECK_MEMBERSHIP: out << "check_membership"; break; - case CHECK_CARDINALITY: out << "check_cardinality"; break; - default: out << "?"; break; - } - return out; -} - TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, OutputChannel& out, @@ -72,15 +44,14 @@ TheoryStrings::TheoryStrings(context::Context* c, d_statistics(), d_equalityEngine(d_notify, c, "theory::strings::ee", true), d_state(c, u, d_equalityEngine, d_valuation), - d_termReg(c, u, d_equalityEngine, out, d_statistics), + d_termReg(d_state, d_equalityEngine, out, d_statistics, nullptr), d_im(nullptr), d_rewriter(&d_statistics.d_rewrites), d_bsolver(nullptr), d_csolver(nullptr), d_esolver(nullptr), d_rsolver(nullptr), - d_stringsFmf(c, u, valuation, d_termReg), - d_strategy_init(false) + d_stringsFmf(c, u, valuation, d_termReg) { setupExtTheory(); ExtTheory* extt = getExtTheory(); @@ -88,7 +59,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_im.reset( new InferenceManager(c, u, d_state, d_termReg, *extt, out, d_statistics)); // initialize the solvers - d_bsolver.reset(new BaseSolver(c, u, d_state, *d_im)); + d_bsolver.reset(new BaseSolver(d_state, *d_im)); d_csolver.reset(new CoreSolver(c, u, d_state, *d_im, d_termReg, *d_bsolver)); d_esolver.reset(new ExtfSolver(c, u, @@ -108,6 +79,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine.addFunctionKind(kind::STRING_CONCAT); d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP); d_equalityEngine.addFunctionKind(kind::STRING_TO_CODE); + d_equalityEngine.addFunctionKind(kind::SEQ_UNIT); // extended functions d_equalityEngine.addFunctionKind(kind::STRING_STRCTN); @@ -118,6 +90,9 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF); d_equalityEngine.addFunctionKind(kind::STRING_STRREPL); d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); + d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE); + d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE_ALL); + d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER); d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER); d_equalityEngine.addFunctionKind(kind::STRING_REV); @@ -135,6 +110,15 @@ TheoryStrings::~TheoryStrings() { } +TheoryRewriter* TheoryStrings::getTheoryRewriter() { return &d_rewriter; } +std::string TheoryStrings::identify() const +{ + return std::string("TheoryStrings"); +} +eq::EqualityEngine* TheoryStrings::getEqualityEngine() +{ + return &d_equalityEngine; +} void TheoryStrings::finishInit() { TheoryModel* tm = d_valuation.getModel(); @@ -231,14 +215,9 @@ bool TheoryStrings::getCurrentSubstitution( int effort, std::vector< Node >& var return true; } -///////////////////////////////////////////////////////////////////////////// -// NOTIFICATIONS -///////////////////////////////////////////////////////////////////////////// - - void TheoryStrings::presolve() { Debug("strings-presolve") << "TheoryStrings::Presolving : get fmf options " << (options::stringFMF() ? "true" : "false") << std::endl; - initializeStrategy(); + d_strat.initializeStrategy(); // if strings fmf is enabled, register the strategy if (options::stringFMF()) @@ -293,7 +272,14 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) std::unordered_set<Node, NodeHashFunction> >& rst : repSet) { - if (!collectModelInfoType(rst.first, rst.second, m)) + // get partition of strings of equal lengths, per type + std::map<TypeNode, std::vector<std::vector<Node> > > colT; + std::map<TypeNode, std::vector<Node> > ltsT; + std::vector<Node> repVec(rst.second.begin(), rst.second.end()); + d_state.separateByLength(repVec, colT, ltsT); + // now collect model info for the type + TypeNode st = rst.first; + if (!collectModelInfoType(st, rst.second, colT[st], ltsT[st], m)) { return false; } @@ -304,14 +290,12 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) bool TheoryStrings::collectModelInfoType( TypeNode tn, const std::unordered_set<Node, NodeHashFunction>& repSet, + std::vector<std::vector<Node> >& col, + std::vector<Node>& lts, TheoryModel* m) { NodeManager* nm = NodeManager::currentNM(); - std::vector<Node> nodes(repSet.begin(), repSet.end()); std::map< Node, Node > processed; - std::vector< std::vector< Node > > col; - std::vector< Node > lts; - d_state.separateByLength(nodes, col, lts); //step 1 : get all values for known lengths std::vector< Node > lts_values; std::map<unsigned, Node> values_used; @@ -379,9 +363,15 @@ bool TheoryStrings::collectModelInfoType( NormalForm& nfe = d_csolver->getNormalForm(eqc); if (nfe.d_nf.size() == 1) { + // is it an equivalence class with a seq.unit term? + if (nfe.d_nf[0].getKind() == SEQ_UNIT) + { + pure_eq_assign[eqc] = nfe.d_nf[0]; + Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") "; + } // does it have a code and the length of these equivalence classes are // one? - if (d_termReg.hasStringCode() && lts_values[i] == d_one) + else if (d_termReg.hasStringCode() && lts_values[i] == d_one) { EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false); if (eip && !eip->d_codeTerm.get().isNull()) @@ -435,14 +425,14 @@ bool TheoryStrings::collectModelInfoType( std::unique_ptr<SEnumLen> sel; Trace("strings-model") << "Cardinality of alphabet is " << utils::getAlphabetCardinality() << std::endl; - if (tn.isString()) + if (tn.isString()) // string-only { sel.reset(new StringEnumLen( currLen, currLen, utils::getAlphabetCardinality())); } else { - Unimplemented() << "Collect model info not implemented for type " << tn; + sel.reset(new SeqEnumLen(tn, nullptr, currLen, currLen)); } for (const Node& eqc : pure_eq) { @@ -518,13 +508,15 @@ bool TheoryStrings::collectModelInfoType( } Trace("strings-model") << "String Model : Pure Assigned." << std::endl; //step 4 : assign constants to all other equivalence classes - for( unsigned i=0; i<nodes.size(); i++ ){ - if( processed.find( nodes[i] )==processed.end() ){ - NormalForm& nf = d_csolver->getNormalForm(nodes[i]); + for (const Node& rn : repSet) + { + if (processed.find(rn) == processed.end()) + { + NormalForm& nf = d_csolver->getNormalForm(rn); if (Trace.isOn("strings-model")) { Trace("strings-model") - << "Construct model for " << nodes[i] << " based on normal form "; + << "Construct model for " << rn << " based on normal form "; for (unsigned j = 0, size = nf.d_nf.size(); j < size; j++) { Node n = nf.d_nf[j]; @@ -549,19 +541,24 @@ bool TheoryStrings::collectModelInfoType( nc.push_back(r.isConst() ? r : processed[r]); } Node cc = utils::mkNConcat(nc, tn); - Assert(cc.isConst()); - Trace("strings-model") << "*** Determined constant " << cc << " for " << nodes[i] << std::endl; - processed[nodes[i]] = cc; - if (!m->assertEquality(nodes[i], cc, true)) + Trace("strings-model") + << "*** Determined constant " << cc << " for " << rn << std::endl; + processed[rn] = cc; + if (!m->assertEquality(rn, cc, true)) { // this should never happen due to the model soundness argument // for strings - Unreachable() << "TheoryStrings::collectModelInfoType: " "Inconsistent equality (unprocessed eqc)" << std::endl; return false; } + else if (!cc.isConst()) + { + // the value may be specified by seq.unit components, ensure this + // is marked as the skeleton for constructing values in this class. + m->assertSkeleton(cc); + } } } //Trace("strings-model") << "String Model : Assigned." << std::endl; @@ -627,11 +624,9 @@ void TheoryStrings::check(Effort e) { } d_im->doPendingFacts(); - Assert(d_strategy_init); - std::map<Effort, std::pair<unsigned, unsigned> >::iterator itsr = - d_strat_steps.find(e); + Assert(d_strat.isStrategyInit()); if (!d_state.isInConflict() && !d_valuation.needCheck() - && itsr != d_strat_steps.end()) + && d_strat.hasStrategyEffort(e)) { Trace("strings-check-debug") << "Theory of strings " << e << " effort check " << std::endl; @@ -672,15 +667,13 @@ void TheoryStrings::check(Effort e) { Trace("strings-eqc") << std::endl; } ++(d_statistics.d_checkRuns); - unsigned sbegin = itsr->second.first; - unsigned send = itsr->second.second; bool addedLemma = false; bool addedFact; Trace("strings-check") << "Full effort check..." << std::endl; do{ ++(d_statistics.d_strategyRuns); Trace("strings-check") << " * Run strategy..." << std::endl; - runStrategy(sbegin, send); + runStrategy(e); // flush the facts addedFact = d_im->hasPendingFact(); addedLemma = d_im->hasPendingLemma(); @@ -1033,107 +1026,16 @@ void TheoryStrings::runInferStep(InferStep s, int effort) << std::endl; } -bool TheoryStrings::hasStrategyEffort(Effort e) const -{ - return d_strat_steps.find(e) != d_strat_steps.end(); -} - -void TheoryStrings::addStrategyStep(InferStep s, int effort, bool addBreak) +void TheoryStrings::runStrategy(Theory::Effort e) { - // must run check init first - Assert((s == CHECK_INIT) == d_infer_steps.empty()); - // must use check cycles when using flat forms - Assert(s != CHECK_FLAT_FORMS - || std::find(d_infer_steps.begin(), d_infer_steps.end(), CHECK_CYCLES) - != d_infer_steps.end()); - d_infer_steps.push_back(s); - d_infer_step_effort.push_back(effort); - if (addBreak) - { - d_infer_steps.push_back(BREAK); - d_infer_step_effort.push_back(0); - } -} + std::vector<std::pair<InferStep, int> >::iterator it = d_strat.stepBegin(e); + std::vector<std::pair<InferStep, int> >::iterator stepEnd = + d_strat.stepEnd(e); -void TheoryStrings::initializeStrategy() -{ - // initialize the strategy if not already done so - if (!d_strategy_init) - { - std::map<Effort, unsigned> step_begin; - std::map<Effort, unsigned> step_end; - d_strategy_init = true; - // beginning indices - step_begin[EFFORT_FULL] = 0; - if (options::stringEager()) - { - step_begin[EFFORT_STANDARD] = 0; - } - // add the inference steps - addStrategyStep(CHECK_INIT); - addStrategyStep(CHECK_CONST_EQC); - addStrategyStep(CHECK_EXTF_EVAL, 0); - addStrategyStep(CHECK_CYCLES); - if (options::stringFlatForms()) - { - addStrategyStep(CHECK_FLAT_FORMS); - } - addStrategyStep(CHECK_EXTF_REDUCTION, 1); - if (options::stringEager()) - { - // do only the above inferences at standard effort, if applicable - step_end[EFFORT_STANDARD] = d_infer_steps.size() - 1; - } - if (!options::stringEagerLen()) - { - addStrategyStep(CHECK_REGISTER_TERMS_PRE_NF); - } - addStrategyStep(CHECK_NORMAL_FORMS_EQ); - addStrategyStep(CHECK_EXTF_EVAL, 1); - if (!options::stringEagerLen() && options::stringLenNorm()) - { - addStrategyStep(CHECK_LENGTH_EQC, 0, false); - addStrategyStep(CHECK_REGISTER_TERMS_NF); - } - addStrategyStep(CHECK_NORMAL_FORMS_DEQ); - addStrategyStep(CHECK_CODES); - if (options::stringEagerLen() && options::stringLenNorm()) - { - addStrategyStep(CHECK_LENGTH_EQC); - } - if (options::stringExp() && !options::stringGuessModel()) - { - addStrategyStep(CHECK_EXTF_REDUCTION, 2); - } - addStrategyStep(CHECK_MEMBERSHIP); - addStrategyStep(CHECK_CARDINALITY); - step_end[EFFORT_FULL] = d_infer_steps.size() - 1; - if (options::stringExp() && options::stringGuessModel()) - { - step_begin[EFFORT_LAST_CALL] = d_infer_steps.size(); - // these two steps are run in parallel - addStrategyStep(CHECK_EXTF_REDUCTION, 2, false); - addStrategyStep(CHECK_EXTF_EVAL, 3); - step_end[EFFORT_LAST_CALL] = d_infer_steps.size() - 1; - } - // set the beginning/ending ranges - for (const std::pair<const Effort, unsigned>& it_begin : step_begin) - { - Effort e = it_begin.first; - std::map<Effort, unsigned>::iterator it_end = step_end.find(e); - Assert(it_end != step_end.end()); - d_strat_steps[e] = - std::pair<unsigned, unsigned>(it_begin.second, it_end->second); - } - } -} - -void TheoryStrings::runStrategy(unsigned sbegin, unsigned send) -{ Trace("strings-process") << "----check, next round---" << std::endl; - for (unsigned i = sbegin; i <= send; i++) + while (it != stepEnd) { - InferStep curr = d_infer_steps[i]; + InferStep curr = it->first; if (curr == BREAK) { if (d_im->hasProcessed()) @@ -1143,12 +1045,13 @@ void TheoryStrings::runStrategy(unsigned sbegin, unsigned send) } else { - runInferStep(curr, d_infer_step_effort[i]); + runInferStep(curr, it->second); if (d_state.isInConflict()) { break; } } + ++it; } Trace("strings-process") << "----finished round---" << std::endl; } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 7c99b6968..368c13a2d 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds, Tianyi Liang, Tim King ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -35,8 +35,8 @@ #include "theory/strings/regexp_operation.h" #include "theory/strings/regexp_solver.h" #include "theory/strings/sequences_stats.h" -#include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" +#include "theory/strings/strategy.h" #include "theory/strings/strings_fmf.h" #include "theory/strings/strings_rewriter.h" #include "theory/strings/term_registry.h" @@ -48,91 +48,72 @@ namespace theory { namespace strings { /** - * Decision procedure for strings. - * + * A theory solver for strings. At a high level, the solver implements + * techniques described in: + * - Liang et al, CAV 2014, + * - Reynolds et al, CAV 2017, + * - Reynolds et al, IJCAR 2020. + * Its rewriter is described in: + * - Reynolds et al, CAV 2019. */ - -/** inference steps - * - * Corresponds to a step in the overall strategy of the strings solver. For - * details on the individual steps, see documentation on the inference schemas - * within TheoryStrings. - */ -enum InferStep -{ - // indicates that the strategy should break if lemmas or facts are added - BREAK, - // check initial - CHECK_INIT, - // check constant equivalence classes - CHECK_CONST_EQC, - // check extended function evaluation - CHECK_EXTF_EVAL, - // check cycles - CHECK_CYCLES, - // check flat forms - CHECK_FLAT_FORMS, - // check register terms pre-normal forms - CHECK_REGISTER_TERMS_PRE_NF, - // check normal forms equalities - CHECK_NORMAL_FORMS_EQ, - // check normal forms disequalities - CHECK_NORMAL_FORMS_DEQ, - // check codes - CHECK_CODES, - // check lengths for equivalence classes - CHECK_LENGTH_EQC, - // check register terms for normal forms - CHECK_REGISTER_TERMS_NF, - // check extended function reductions - CHECK_EXTF_REDUCTION, - // check regular expression memberships - CHECK_MEMBERSHIP, - // check cardinality - CHECK_CARDINALITY, -}; -std::ostream& operator<<(std::ostream& out, Inference i); - class TheoryStrings : public Theory { friend class InferenceManager; - typedef context::CDList<Node> NodeList; - typedef context::CDHashMap<Node, bool, NodeHashFunction> NodeBoolMap; - typedef context::CDHashMap<Node, int, NodeHashFunction> NodeIntMap; - typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap; typedef context::CDHashSet<Node, NodeHashFunction> NodeSet; typedef context::CDHashSet<TypeNode, TypeNodeHashFunction> TypeNodeSet; - public: TheoryStrings(context::Context* c, context::UserContext* u, OutputChannel& out, Valuation valuation, const LogicInfo& logicInfo); ~TheoryStrings(); - /** finish initialization */ void finishInit() override; - - TheoryRewriter* getTheoryRewriter() override { return &d_rewriter; } - + /** Get the theory rewriter of this class */ + TheoryRewriter* getTheoryRewriter() override; + /** Set the master equality engine */ void setMasterEqualityEngine(eq::EqualityEngine* eq) override; - - std::string identify() const override { return std::string("TheoryStrings"); } - - public: + /** Identify this theory */ + std::string identify() const override; + /** Propagate */ void propagate(Effort e) override; - bool propagate(TNode literal); + /** Explain */ Node explain(TNode literal) override; - eq::EqualityEngine* getEqualityEngine() override { return &d_equalityEngine; } + /** Get the equality engine */ + eq::EqualityEngine* getEqualityEngine() override; + /** Get current substitution */ bool getCurrentSubstitution(int effort, std::vector<Node>& vars, std::vector<Node>& subs, std::map<Node, std::vector<Node> >& exp) override; + /** presolve */ + void presolve() override; + /** shutdown */ + void shutdown() override {} + /** add shared term */ + void addSharedTerm(TNode n) override; + /** get equality status */ + EqualityStatus getEqualityStatus(TNode a, TNode b) override; + /** preregister term */ + void preRegisterTerm(TNode n) override; + /** Expand definition */ + Node expandDefinition(Node n) override; + /** Check at effort e */ + void check(Effort e) override; + /** needs check last effort */ + bool needsCheckLastEffort() override; + /** Conflict when merging two constants */ + void conflict(TNode a, TNode b); + /** called when a new equivalence class is created */ + void eqNotifyNewClass(TNode t); + /** preprocess rewrite */ + Node ppRewrite(TNode atom) override; /** * Get all relevant information in this theory regarding the current * model. Return false if a contradiction is discovered. */ bool collectModelInfo(TheoryModel* m) override; - // NotifyClass for equality engine + private: + /** NotifyClass for equality engine */ class NotifyClass : public eq::EqualityEngineNotify { public: NotifyClass(TheoryStrings& ts) : d_str(ts), d_state(ts.d_state) {} @@ -202,74 +183,8 @@ class TheoryStrings : public Theory { /** The solver state of the theory of strings */ SolverState& d_state; };/* class TheoryStrings::NotifyClass */ - - private: - // Constants - Node d_emptyString; - Node d_true; - Node d_false; - Node d_zero; - Node d_one; - Node d_neg_one; - /** the cardinality of the alphabet */ - uint32_t d_cardSize; - /** The notify class */ - NotifyClass d_notify; - - /** - * Statistics for the theory of strings/sequences. All statistics for these - * theories is collected in this object. - */ - SequencesStatistics d_statistics; - - /** Equaltity engine */ - eq::EqualityEngine d_equalityEngine; - /** The solver state object */ - SolverState d_state; - /** The term registry for this theory */ - TermRegistry d_termReg; - /** The (custom) output channel of the theory of strings */ - std::unique_ptr<InferenceManager> d_im; - - private: - std::map< Node, Node > d_eqc_to_len_term; - - - ///////////////////////////////////////////////////////////////////////////// - // NOTIFICATIONS - ///////////////////////////////////////////////////////////////////////////// - public: - void presolve() override; - void shutdown() override {} - - ///////////////////////////////////////////////////////////////////////////// - // MAIN SOLVER - ///////////////////////////////////////////////////////////////////////////// - private: - void addSharedTerm(TNode n) override; - EqualityStatus getEqualityStatus(TNode a, TNode b) override; - - private: - void addCarePairs(TNodeTrie* t1, - TNodeTrie* t2, - unsigned arity, - unsigned depth); - - public: - /** preregister term */ - void preRegisterTerm(TNode n) override; - /** Expand definition */ - Node expandDefinition(Node n) override; - /** Check at effort e */ - void check(Effort e) override; - /** needs check last effort */ - bool needsCheckLastEffort() override; - /** Conflict when merging two constants */ - void conflict(TNode a, TNode b); - /** called when a new equivalence class is created */ - void eqNotifyNewClass(TNode t); - - protected: + /** propagate method */ + bool propagate(TNode literal); /** compute care graph */ void computeCareGraph() override; /** @@ -277,17 +192,25 @@ class TheoryStrings : public Theory { * the care graph in the above function. */ bool areCareDisequal(TNode x, TNode y); - + /** Add care pairs */ + void addCarePairs(TNodeTrie* t1, + TNodeTrie* t2, + unsigned arity, + unsigned depth); /** Collect model info for type tn * * Assigns model values (in m) to all relevant terms of the string-like type - * tn in the current context, which are stored in repSet. + * tn in the current context, which are stored in repSet. Furthermore, + * col is a partition of repSet where equivalence classes are grouped into + * sets having equal length, where these lengths are stored in lts. * * Returns false if a conflict is discovered while doing this assignment. */ bool collectModelInfoType( TypeNode tn, const std::unordered_set<Node, NodeHashFunction>& repSet, + std::vector<std::vector<Node> >& col, + std::vector<Node>& lts, TheoryModel* m); /** assert pending fact @@ -299,38 +222,6 @@ class TheoryStrings : public Theory { * of atom, including calls to registerTerm. */ void assertPendingFact(Node atom, bool polarity, Node exp); - - // Symbolic Regular Expression - private: - /** The theory rewriter for this theory. */ - StringsRewriter d_rewriter; - /** - * The base solver, responsible for reasoning about congruent terms and - * inferring constants for equivalence classes. - */ - std::unique_ptr<BaseSolver> d_bsolver; - /** - * The core solver, responsible for reasoning about string concatenation - * with length constraints. - */ - std::unique_ptr<CoreSolver> d_csolver; - /** - * Extended function solver, responsible for reductions and simplifications - * involving extended string functions. - */ - std::unique_ptr<ExtfSolver> d_esolver; - /** regular expression solver module */ - std::unique_ptr<RegExpSolver> d_rsolver; - /** regular expression elimination module */ - RegExpElimination d_regexp_elim; - /** Strings finite model finding decision strategy */ - StringsFmf d_stringsFmf; - - public: - // ppRewrite - Node ppRewrite(TNode atom) override; - - private: //-----------------------inference steps /** check register terms pre-normal forms * @@ -356,42 +247,58 @@ class TheoryStrings : public Theory { */ void checkRegisterTermsNormalForms(); //-----------------------end inference steps - - //-----------------------representation of the strategy - /** is strategy initialized */ - bool d_strategy_init; /** run the given inference step */ void runInferStep(InferStep s, int effort); - /** the strategy */ - std::vector<InferStep> d_infer_steps; - /** the effort levels */ - std::vector<int> d_infer_step_effort; - /** the range (begin, end) of steps to run at given efforts */ - std::map<Effort, std::pair<unsigned, unsigned> > d_strat_steps; - /** do we have a strategy for effort e? */ - bool hasStrategyEffort(Effort e) const; - /** initialize the strategy - * - * This adds (s,effort) as a strategy step to the vectors d_infer_steps and - * d_infer_step_effort. This indicates that a call to runInferStep should - * be run as the next step in the strategy. If addBreak is true, we add - * a BREAK to the strategy following this step. + /** run strategy for effort e */ + void runStrategy(Theory::Effort e); + /** Commonly used constants */ + Node d_true; + Node d_false; + Node d_zero; + Node d_one; + Node d_neg_one; + /** the cardinality of the alphabet */ + uint32_t d_cardSize; + /** The notify class */ + NotifyClass d_notify; + /** + * Statistics for the theory of strings/sequences. All statistics for these + * theories is collected in this object. */ - void addStrategyStep(InferStep s, int effort = 0, bool addBreak = true); - /** initialize the strategy - * - * This initializes the above information based on the options. This makes - * a series of calls to addStrategyStep above. + SequencesStatistics d_statistics; + /** Equaltity engine */ + eq::EqualityEngine d_equalityEngine; + /** The solver state object */ + SolverState d_state; + /** The term registry for this theory */ + TermRegistry d_termReg; + /** The (custom) output channel of the theory of strings */ + std::unique_ptr<InferenceManager> d_im; + /** The theory rewriter for this theory. */ + StringsRewriter d_rewriter; + /** + * The base solver, responsible for reasoning about congruent terms and + * inferring constants for equivalence classes. */ - void initializeStrategy(); - /** run strategy - * - * This executes the inference steps starting at index sbegin and ending at - * index send. We exit if any step in this sequence adds a lemma or infers a - * fact. + std::unique_ptr<BaseSolver> d_bsolver; + /** + * The core solver, responsible for reasoning about string concatenation + * with length constraints. */ - void runStrategy(unsigned sbegin, unsigned send); - //-----------------------end representation of the strategy + std::unique_ptr<CoreSolver> d_csolver; + /** + * Extended function solver, responsible for reductions and simplifications + * involving extended string functions. + */ + std::unique_ptr<ExtfSolver> d_esolver; + /** regular expression solver module */ + std::unique_ptr<RegExpSolver> d_rsolver; + /** regular expression elimination module */ + RegExpElimination d_regexp_elim; + /** Strings finite model finding decision strategy */ + StringsFmf d_stringsFmf; + /** The representation of the strategy */ + Strategy d_strat; };/* class TheoryStrings */ }/* CVC4::theory::strings namespace */ diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 939146a3d..af946567b 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -2,9 +2,9 @@ /*! \file theory_strings_preprocess.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -38,48 +38,45 @@ StringsPreprocess::StringsPreprocess(SkolemCache* sc, SequencesStatistics& stats) : d_sc(sc), d_statistics(stats) { - //Constants - d_zero = NodeManager::currentNM()->mkConst(Rational(0)); - d_one = NodeManager::currentNM()->mkConst(Rational(1)); - d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); } StringsPreprocess::~StringsPreprocess(){ } -Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { - unsigned prev_new_nodes = new_nodes.size(); - Trace("strings-preprocess-debug") << "StringsPreprocess::simplify: " << t << std::endl; +Node StringsPreprocess::reduce(Node t, + std::vector<Node>& asserts, + SkolemCache* sc) +{ + Trace("strings-preprocess-debug") + << "StringsPreprocess::reduce: " << t << std::endl; Node retNode = t; - NodeManager *nm = NodeManager::currentNM(); + NodeManager* nm = NodeManager::currentNM(); + Node zero = nm->mkConst(Rational(0)); + Node one = nm->mkConst(Rational(1)); + Node negOne = nm->mkConst(Rational(-1)); if( t.getKind() == kind::STRING_SUBSTR ) { // processing term: substr( s, n, m ) Node s = t[0]; Node n = t[1]; Node m = t[2]; - Node skt = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); + Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); Node t12 = nm->mkNode(PLUS, n, m); t12 = Rewriter::rewrite(t12); Node lt0 = nm->mkNode(STRING_LENGTH, s); //start point is greater than or equal zero - Node c1 = nm->mkNode(GEQ, n, d_zero); + Node c1 = nm->mkNode(GEQ, n, zero); //start point is less than end of string Node c2 = nm->mkNode(GT, lt0, n); //length is positive - Node c3 = nm->mkNode(GT, m, d_zero); + Node c3 = nm->mkNode(GT, m, zero); Node cond = nm->mkNode(AND, c1, c2, c3); Node emp = Word::mkEmptyWord(t.getType()); - Node sk1 = n == d_zero ? emp - : d_sc->mkSkolemCached( - s, n, SkolemCache::SK_PREFIX, "sspre"); - Node sk2 = ArithEntail::check(t12, lt0) - ? emp - : d_sc->mkSkolemCached( - s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); + Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); + Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2)); //length of first skolem is second argument Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); @@ -89,7 +86,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node b13 = nm->mkNode( OR, nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, nm->mkNode(PLUS, n, m))), - nm->mkNode(EQUAL, lsk2, d_zero)); + nm->mkNode(EQUAL, lsk2, zero)); // Length of the result is at most m Node b14 = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, skt), m); @@ -112,7 +109,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // satisfied. If n + m is less than the length of s, then len(sk2) = 0 // cannot be satisfied because we have the constraint that len(skt) <= m, // so sk2 must be greater than 0. - new_nodes.push_back( lemma ); + asserts.push_back(lemma); // Thus, substr( s, n, m ) = skt retNode = skt; @@ -123,15 +120,16 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node x = t[0]; Node y = t[1]; Node n = t[2]; - Node skk = nm->mkSkolem("iok", nm->integerType(), "created for indexof"); + Node skk = sc->mkTypedSkolemCached( + nm->integerType(), t, SkolemCache::SK_PURIFY, "iok"); Node negone = nm->mkConst(Rational(-1)); Node krange = nm->mkNode(GEQ, skk, negone); // assert: indexof( x, y, n ) >= -1 - new_nodes.push_back( krange ); + asserts.push_back(krange); krange = nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, x), skk); // assert: len( x ) >= indexof( x, y, z ) - new_nodes.push_back( krange ); + asserts.push_back(krange); // substr( x, n, len( x ) - n ) Node st = nm->mkNode(STRING_SUBSTR, @@ -139,16 +137,16 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n)); Node io2 = - d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre"); + sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre"); Node io4 = - d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost"); + sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost"); // ~contains( substr( x, n, len( x ) - n ), y ) Node c11 = nm->mkNode(STRING_STRCTN, st, y).negate(); // n > len( x ) Node c12 = nm->mkNode(GT, n, nm->mkNode(STRING_LENGTH, x)); // 0 > n - Node c13 = nm->mkNode(GT, d_zero, n); + Node c13 = nm->mkNode(GT, zero, n); Node cond1 = nm->mkNode(OR, c11, c12, c13); // skk = -1 Node cc1 = skk.eqNode(negone); @@ -171,8 +169,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { nm->mkNode( STRING_SUBSTR, y, - d_zero, - nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), d_one))), + zero, + nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), one))), y) .negate(); // skk = n + len( io2 ) @@ -189,7 +187,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // skk = n + len( io2 ) // for fresh io2, io4. Node rr = nm->mkNode(ITE, cond1, cc1, nm->mkNode(ITE, cond2, cc2, cc3)); - new_nodes.push_back( rr ); + asserts.push_back(rr); // Thus, indexof( x, y, n ) = skk. retNode = skk; @@ -198,7 +196,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { { // processing term: int.to.str( n ) Node n = t[0]; - Node itost = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost"); + Node itost = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost"); Node leni = nm->mkNode(STRING_LENGTH, itost); std::vector<Node> conc; @@ -206,21 +204,20 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { argTypes.push_back(nm->integerType()); Node u = nm->mkSkolem("U", nm->mkFunctionType(argTypes, nm->integerType())); - Node lem = nm->mkNode(GEQ, leni, d_one); + Node lem = nm->mkNode(GEQ, leni, one); conc.push_back(lem); lem = n.eqNode(nm->mkNode(APPLY_UF, u, leni)); conc.push_back(lem); - lem = d_zero.eqNode(nm->mkNode(APPLY_UF, u, d_zero)); + lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero)); conc.push_back(lem); - Node x = nm->mkBoundVar(nm->integerType()); - Node xPlusOne = nm->mkNode(PLUS, x, d_one); + Node x = SkolemCache::mkIndexVar(t); + Node xPlusOne = nm->mkNode(PLUS, x, one); Node xbv = nm->mkNode(BOUND_VAR_LIST, x); - Node g = - nm->mkNode(AND, nm->mkNode(GEQ, x, d_zero), nm->mkNode(LT, x, leni)); - Node sx = nm->mkNode(STRING_SUBSTR, itost, x, d_one); + Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, leni)); + Node sx = nm->mkNode(STRING_SUBSTR, itost, x, one); Node ux = nm->mkNode(APPLY_UF, u, x); Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne); Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); @@ -229,10 +226,10 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node ten = nm->mkConst(Rational(10)); Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux))); Node leadingZeroPos = - nm->mkNode(AND, x.eqNode(d_zero), nm->mkNode(GT, leni, d_one)); + nm->mkNode(AND, x.eqNode(zero), nm->mkNode(GT, leni, one)); Node cb = nm->mkNode( AND, - nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, d_one, d_zero)), + nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, one, zero)), nm->mkNode(LT, c, ten)); Node ux1lem = nm->mkNode(GEQ, n, ux1); @@ -241,11 +238,11 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { lem = nm->mkNode(FORALL, xbv, lem); conc.push_back(lem); - Node nonneg = nm->mkNode(GEQ, n, d_zero); + Node nonneg = nm->mkNode(GEQ, n, zero); Node emp = Word::mkEmptyWord(t.getType()); lem = nm->mkNode(ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(emp)); - new_nodes.push_back(lem); + asserts.push_back(lem); // assert: // IF n>=0 // THEN: @@ -274,26 +271,27 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { retNode = itost; } else if( t.getKind() == kind::STRING_STOI ) { Node s = t[0]; - Node stoit = nm->mkSkolem("stoit", nm->integerType(), "created for stoi"); + Node stoit = sc->mkTypedSkolemCached( + nm->integerType(), t, SkolemCache::SK_PURIFY, "stoit"); Node lens = nm->mkNode(STRING_LENGTH, s); std::vector<Node> conc1; - Node lem = stoit.eqNode(d_neg_one); + Node lem = stoit.eqNode(negOne); conc1.push_back(lem); Node emp = Word::mkEmptyWord(s.getType()); Node sEmpty = s.eqNode(emp); Node k = nm->mkSkolem("k", nm->integerType()); - Node kc1 = nm->mkNode(GEQ, k, d_zero); + Node kc1 = nm->mkNode(GEQ, k, zero); Node kc2 = nm->mkNode(LT, k, lens); Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); Node codeSk = nm->mkNode( MINUS, - nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, d_one)), + nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, one)), c0); Node ten = nm->mkConst(Rational(10)); Node kc3 = nm->mkNode( - OR, nm->mkNode(LT, codeSk, d_zero), nm->mkNode(GEQ, codeSk, ten)); + OR, nm->mkNode(LT, codeSk, zero), nm->mkNode(GEQ, codeSk, ten)); conc1.push_back(nm->mkNode(OR, sEmpty, nm->mkNode(AND, kc1, kc2, kc3))); std::vector<Node> conc2; @@ -304,24 +302,22 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens)); conc2.push_back(lem); - lem = d_zero.eqNode(nm->mkNode(APPLY_UF, u, d_zero)); + lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero)); conc2.push_back(lem); - lem = nm->mkNode(GT, lens, d_zero); + lem = nm->mkNode(GT, lens, zero); conc2.push_back(lem); - Node x = nm->mkBoundVar(nm->integerType()); + Node x = SkolemCache::mkIndexVar(t); Node xbv = nm->mkNode(BOUND_VAR_LIST, x); - Node g = - nm->mkNode(AND, nm->mkNode(GEQ, x, d_zero), nm->mkNode(LT, x, lens)); - Node sx = nm->mkNode(STRING_SUBSTR, s, x, d_one); + Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, lens)); + Node sx = nm->mkNode(STRING_SUBSTR, s, x, one); Node ux = nm->mkNode(APPLY_UF, u, x); - Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, d_one)); + Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, one)); Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0); Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux))); - Node cb = - nm->mkNode(AND, nm->mkNode(GEQ, c, d_zero), nm->mkNode(LT, c, ten)); + Node cb = nm->mkNode(AND, nm->mkNode(GEQ, c, zero), nm->mkNode(LT, c, ten)); Node ux1lem = nm->mkNode(GEQ, stoit, ux1); @@ -329,9 +325,9 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { lem = nm->mkNode(FORALL, xbv, lem); conc2.push_back(lem); - Node sneg = nm->mkNode(LT, stoit, d_zero); + Node sneg = nm->mkNode(LT, stoit, zero); lem = nm->mkNode(ITE, sneg, nm->mkNode(AND, conc1), nm->mkNode(AND, conc2)); - new_nodes.push_back(lem); + asserts.push_back(lem); // assert: // IF stoit < 0 @@ -362,10 +358,10 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node z = t[2]; TypeNode tn = t[0].getType(); Node rp1 = - d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre"); + sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre"); Node rp2 = - d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost"); - Node rpw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw"); + sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost"); + Node rpw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw"); // y = "" Node emp = Word::mkEmptyWord(tn); @@ -387,10 +383,10 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { rp1, nm->mkNode(kind::STRING_SUBSTR, y, - d_zero, + zero, nm->mkNode(kind::MINUS, nm->mkNode(kind::STRING_LENGTH, y), - d_one))), + one))), y) .negate(); @@ -410,7 +406,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { cond2, nm->mkNode(kind::AND, c21, c22, c23), rpw.eqNode(x))); - new_nodes.push_back( rr ); + asserts.push_back(rr); // Thus, replace( x, y, z ) = rpw. retNode = rpw; @@ -421,16 +417,16 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node x = t[0]; Node y = t[1]; Node z = t[2]; - Node rpaw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw"); + Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw"); - Node numOcc = d_sc->mkTypedSkolemCached( + Node numOcc = sc->mkTypedSkolemCached( nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); std::vector<TypeNode> argTypes; argTypes.push_back(nm->integerType()); Node us = nm->mkSkolem("Us", nm->mkFunctionType(argTypes, nm->stringType())); TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); - Node uf = d_sc->mkTypedSkolemCached( + Node uf = sc->mkTypedSkolemCached( ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); @@ -438,27 +434,27 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x)); std::vector<Node> lem; - lem.push_back(nm->mkNode(GEQ, numOcc, d_zero)); - lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, d_zero))); + lem.push_back(nm->mkNode(GEQ, numOcc, zero)); + lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, zero))); lem.push_back(usno.eqNode(rem)); - lem.push_back(nm->mkNode(APPLY_UF, uf, d_zero).eqNode(d_zero)); - lem.push_back(nm->mkNode(STRING_STRIDOF, x, y, ufno).eqNode(d_neg_one)); + lem.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); + lem.push_back(nm->mkNode(STRING_STRIDOF, x, y, ufno).eqNode(negOne)); - Node i = nm->mkBoundVar(nm->integerType()); + Node i = SkolemCache::mkIndexVar(t); Node bvli = nm->mkNode(BOUND_VAR_LIST, i); Node bound = - nm->mkNode(AND, nm->mkNode(GEQ, i, d_zero), nm->mkNode(LT, i, numOcc)); + nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); Node ufi = nm->mkNode(APPLY_UF, uf, i); - Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, d_one)); + Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, one)); Node ii = nm->mkNode(STRING_STRIDOF, x, y, ufi); Node cc = nm->mkNode( STRING_CONCAT, nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)), z, - nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, d_one))); + nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, one))); std::vector<Node> flem; - flem.push_back(ii.eqNode(d_neg_one).negate()); + flem.push_back(ii.eqNode(negOne).negate()); flem.push_back(nm->mkNode(APPLY_UF, us, i).eqNode(cc)); flem.push_back( ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y)))); @@ -487,27 +483,194 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node emp = Word::mkEmptyWord(t.getType()); Node assert = nm->mkNode(ITE, y.eqNode(emp), rpaw.eqNode(x), nm->mkNode(AND, lem)); - new_nodes.push_back(assert); + asserts.push_back(assert); // Thus, replaceall( x, y, z ) = rpaw retNode = rpaw; } + else if (t.getKind() == STRING_REPLACE_RE) + { + Node x = t[0]; + Node y = t[1]; + Node z = t[2]; + Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); + + std::vector<Node> emptyVec; + Node sigmaStar = + nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); + Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar); + // in_re(x, re.++(_*, y, _*)) + Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + + // in_re("", y) + Node matchesEmpty = + nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y); + // k = z ++ x + Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x)); + + Node k1 = + sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre"); + Node k2 = + sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match"); + Node k3 = + sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post"); + // x = k1 ++ k2 ++ k3 + Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); + // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) + Node k2len = nm->mkNode(STRING_LENGTH, k2); + Node firstMatch = + nm->mkNode( + STRING_IN_REGEXP, + nm->mkNode( + STRING_CONCAT, + k1, + nm->mkNode( + STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))), + re) + .negate(); + // in_re(k2, y) + Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y); + // k = k1 ++ z ++ k3 + Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); + + // IF in_re(x, re.++(_*, y, _*)) + // THEN: + // IF in_re("", y) + // THEN: k = z ++ x + // ELSE: + // x = k1 ++ k2 ++ k3 ^ + // ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^ + // in_re(k2, y) ^ k = k1 ++ z ++ k3 + // ELSE: k = x + asserts.push_back(nm->mkNode( + ITE, + hasMatch, + nm->mkNode(ITE, + matchesEmpty, + res1, + nm->mkNode(AND, splitX, firstMatch, k2Match, res2)), + k.eqNode(x))); + retNode = k; + } + else if (t.getKind() == STRING_REPLACE_RE_ALL) + { + Node x = t[0]; + Node y = t[1]; + Node z = t[2]; + Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); + + Node numOcc = sc->mkTypedSkolemCached( + nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); + std::vector<TypeNode> argTypes; + argTypes.push_back(nm->integerType()); + Node us = nm->mkSkolem("Us", nm->mkFunctionType(argTypes, t.getType())); + TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); + Node uf = sc->mkTypedSkolemCached( + ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); + Node ul = + sc->mkTypedSkolemCached(ufType, x, y, SkolemCache::SK_OCCUR_LEN, "Ul"); + + Node emp = Word::mkEmptyWord(t.getType()); + + std::vector<Node> emptyVec; + Node sigmaStar = + nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); + Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); + Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar); + // in_re(x, _* ++ y' ++ _*) + Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + + Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); + Node usno = nm->mkNode(APPLY_UF, us, numOcc); + Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x)); + + std::vector<Node> lemmas; + // numOcc > 0 + lemmas.push_back(nm->mkNode(GT, numOcc, zero)); + // k = Us(0) + lemmas.push_back(k.eqNode(nm->mkNode(APPLY_UF, us, zero))); + // Us(numOcc) = substr(x, Uf(numOcc)) + lemmas.push_back(usno.eqNode(rem)); + // Uf(0) = 0 + lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); + // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*))) + lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate()); + + Node i = SkolemCache::mkIndexVar(t); + Node bvli = nm->mkNode(BOUND_VAR_LIST, i); + Node bound = + nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); + Node ip1 = nm->mkNode(PLUS, i, one); + Node ufi = nm->mkNode(APPLY_UF, uf, i); + Node uli = nm->mkNode(APPLY_UF, ul, i); + Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1); + Node ii = nm->mkNode(MINUS, ufip1, uli); + Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli); + Node pfxMatch = + nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); + Node nonMatch = + nm->mkNode(STRING_SUBSTR, + x, + ufi, + nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi)); + + std::vector<Node> flem; + // Ul(i) > 0 + flem.push_back(nm->mkNode(GT, uli, zero)); + // Uf(i + 1) >= Uf(i) + Ul(i) + flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli))); + // in_re(substr(x, ii, Ul(i)), y') + flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp)); + // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) + flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate()); + // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) + flem.push_back( + nm->mkNode(APPLY_UF, us, i) + .eqNode(nm->mkNode( + STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1)))); + + Node forall = nm->mkNode( + FORALL, bvli, nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem))); + lemmas.push_back(forall); + + // IF in_re(x, re.++(_*, y', _*)) + // THEN: + // numOcc > 0 ^ + // k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ + // Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*))) + // forall i. 0 <= i < nummOcc => + // Ul(i) > 0 ^ + // Uf(i + 1) >= Uf(i) + Ul(i) ^ + // in_re(substr(x, ii, Ul(i)), y') ^ + // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^ + // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) + // where ii = Uf(i + 1) - Ul(i) + // ELSE: k = x + // where y' = re.diff(y, "") + // + // Conceptually, y' is the regex y but excluding the empty string (because + // we do not want to match empty strings), numOcc is the number of shortest + // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i) + // is the length of the i^th match, and Us(i) is the result of processing + // the remainder after processing the i^th occurrence of y in x. + asserts.push_back( + nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x))); + retNode = k; + } else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) { Node x = t[0]; - Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); + Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); Node lenx = nm->mkNode(STRING_LENGTH, x); Node lenr = nm->mkNode(STRING_LENGTH, r); Node eqLenA = lenx.eqNode(lenr); - Node i = nm->mkBoundVar(nm->integerType()); + Node i = SkolemCache::mkIndexVar(t); Node bvi = nm->mkNode(BOUND_VAR_LIST, i); - Node ci = - nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, d_one)); - Node ri = - nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, d_one)); + Node ci = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, one)); + Node ri = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, one)); Node lb = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 97 : 65)); Node ub = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 122 : 90)); @@ -521,7 +684,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { ci); Node bound = - nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LT, i, lenr)); + nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); Node rangeA = nm->mkNode(FORALL, bvi, nm->mkNode(OR, bound.negate(), ri.eqNode(res))); @@ -533,7 +696,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // str.code( str.substr(r,i,1) ) = ite( 97 <= ci <= 122, ci-32, ci) // where ci = str.code( str.substr(x,i,1) ) Node assert = nm->mkNode(AND, eqLenA, rangeA); - new_nodes.push_back(assert); + asserts.push_back(assert); // Thus, toLower( x ) = r retNode = r; @@ -541,22 +704,22 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { else if (t.getKind() == STRING_REV) { Node x = t[0]; - Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); + Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); Node lenx = nm->mkNode(STRING_LENGTH, x); Node lenr = nm->mkNode(STRING_LENGTH, r); Node eqLenA = lenx.eqNode(lenr); - Node i = nm->mkBoundVar(nm->integerType()); + Node i = SkolemCache::mkIndexVar(t); Node bvi = nm->mkNode(BOUND_VAR_LIST, i); Node revi = nm->mkNode( - MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, d_one)); - Node ssr = nm->mkNode(STRING_SUBSTR, r, i, d_one); - Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, d_one); + MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, one)); + Node ssr = nm->mkNode(STRING_SUBSTR, r, i, one); + Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, one); Node bound = - nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LT, i, lenr)); + nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); Node rangeA = nm->mkNode( FORALL, bvi, nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx))); // assert: @@ -564,7 +727,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // forall i. 0 <= i < len(r) => // substr(r,i,1) = substr(x,len(x)-(i+1),1) Node assert = nm->mkNode(AND, eqLenA, rangeA); - new_nodes.push_back(assert); + asserts.push_back(assert); // Thus, (str.rev x) = r retNode = r; @@ -576,31 +739,38 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { //negative contains reduces to existential Node lenx = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x); Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); - Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); + Node b1 = SkolemCache::mkIndexVar(t); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); - Node body = NodeManager::currentNM()->mkNode( kind::AND, - NodeManager::currentNM()->mkNode( kind::LEQ, d_zero, b1 ), - NodeManager::currentNM()->mkNode( kind::LEQ, b1, NodeManager::currentNM()->mkNode( kind::MINUS, lenx, lens ) ), - NodeManager::currentNM()->mkNode( kind::EQUAL, NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens), s ) - ); + Node body = NodeManager::currentNM()->mkNode( + kind::AND, + NodeManager::currentNM()->mkNode(kind::LEQ, zero, b1), + NodeManager::currentNM()->mkNode( + kind::LEQ, + b1, + NodeManager::currentNM()->mkNode(kind::MINUS, lenx, lens)), + NodeManager::currentNM()->mkNode( + kind::EQUAL, + NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens), + s)); retNode = NodeManager::currentNM()->mkNode( kind::EXISTS, b1v, body ); } else if (t.getKind() == kind::STRING_LEQ) { - Node ltp = nm->mkSkolem("ltp", nm->booleanType()); + Node ltp = sc->mkTypedSkolemCached( + nm->booleanType(), t, SkolemCache::SK_PURIFY, "ltp"); Node k = nm->mkSkolem("k", nm->integerType()); std::vector<Node> conj; - conj.push_back(nm->mkNode(GEQ, k, d_zero)); + conj.push_back(nm->mkNode(GEQ, k, zero)); Node substr[2]; Node code[2]; for (unsigned r = 0; r < 2; r++) { Node ta = t[r]; Node tb = t[1 - r]; - substr[r] = nm->mkNode(STRING_SUBSTR, ta, d_zero, k); + substr[r] = nm->mkNode(STRING_SUBSTR, ta, zero, k); code[r] = - nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, ta, k, d_one)); + nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, ta, k, one)); conj.push_back(nm->mkNode(LEQ, k, nm->mkNode(STRING_LENGTH, ta))); } conj.push_back(substr[0].eqNode(substr[1])); @@ -632,18 +802,29 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 )) Node assert = nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, nm->mkNode(AND, conj)); - new_nodes.push_back(assert); + asserts.push_back(assert); // Thus, str.<=( x, y ) = ltp retNode = ltp; } + return retNode; +} +Node StringsPreprocess::simplify(Node t, std::vector<Node>& asserts) +{ + size_t prev_asserts = asserts.size(); + // call the static reduce routine + Node retNode = reduce(t, asserts, d_sc); if( t!=retNode ){ Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << " -> " << retNode << std::endl; - if(!new_nodes.empty()) { - Trace("strings-preprocess") << " ... new nodes (" << (new_nodes.size()-prev_new_nodes) << "):" << std::endl; - for(unsigned int i=prev_new_nodes; i<new_nodes.size(); ++i) { - Trace("strings-preprocess") << " " << new_nodes[i] << std::endl; + if (!asserts.empty()) + { + Trace("strings-preprocess") + << " ... new nodes (" << (asserts.size() - prev_asserts) + << "):" << std::endl; + for (size_t i = prev_asserts; i < asserts.size(); ++i) + { + Trace("strings-preprocess") << " " << asserts[i] << std::endl; } } d_statistics.d_reductions << t.getKind(); @@ -656,14 +837,17 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { return retNode; } -Node StringsPreprocess::simplifyRec( Node t, std::vector< Node > & new_nodes, std::map< Node, Node >& visited ){ +Node StringsPreprocess::simplifyRec(Node t, + std::vector<Node>& asserts, + std::map<Node, Node>& visited) +{ std::map< Node, Node >::iterator it = visited.find(t); if( it!=visited.end() ){ return it->second; }else{ Node retNode = t; if( t.getNumChildren()==0 ){ - retNode = simplify( t, new_nodes ); + retNode = simplify(t, asserts); }else if( t.getKind()!=kind::FORALL ){ bool changed = false; std::vector< Node > cc; @@ -671,7 +855,7 @@ Node StringsPreprocess::simplifyRec( Node t, std::vector< Node > & new_nodes, st cc.push_back( t.getOperator() ); } for(unsigned i=0; i<t.getNumChildren(); i++) { - Node s = simplifyRec( t[i], new_nodes, visited ); + Node s = simplifyRec(t[i], asserts, visited); cc.push_back( s ); if( s!=t[i] ) { changed = true; @@ -681,24 +865,27 @@ Node StringsPreprocess::simplifyRec( Node t, std::vector< Node > & new_nodes, st if( changed ){ tmp = NodeManager::currentNM()->mkNode( t.getKind(), cc ); } - retNode = simplify( tmp, new_nodes ); + retNode = simplify(tmp, asserts); } visited[t] = retNode; return retNode; } } -Node StringsPreprocess::processAssertion( Node n, std::vector< Node > &new_nodes ) { +Node StringsPreprocess::processAssertion(Node n, std::vector<Node>& asserts) +{ std::map< Node, Node > visited; - std::vector< Node > new_nodes_curr; - Node ret = simplifyRec( n, new_nodes_curr, visited ); - while( !new_nodes_curr.empty() ){ - Node curr = new_nodes_curr.back(); - new_nodes_curr.pop_back(); - std::vector< Node > new_nodes_tmp; - curr = simplifyRec( curr, new_nodes_tmp, visited ); - new_nodes_curr.insert( new_nodes_curr.end(), new_nodes_tmp.begin(), new_nodes_tmp.end() ); - new_nodes.push_back( curr ); + std::vector<Node> asserts_curr; + Node ret = simplifyRec(n, asserts_curr, visited); + while (!asserts_curr.empty()) + { + Node curr = asserts_curr.back(); + asserts_curr.pop_back(); + std::vector<Node> asserts_tmp; + curr = simplifyRec(curr, asserts_tmp, visited); + asserts_curr.insert( + asserts_curr.end(), asserts_tmp.begin(), asserts_tmp.end()); + asserts.push_back(curr); } return ret; } @@ -708,18 +895,22 @@ void StringsPreprocess::processAssertions( std::vector< Node > &vec_node ){ for( unsigned i=0; i<vec_node.size(); i++ ){ Trace("strings-preprocess-debug") << "Preprocessing assertion " << vec_node[i] << std::endl; //preprocess until fixed point - std::vector< Node > new_nodes; - std::vector< Node > new_nodes_curr; - new_nodes_curr.push_back( vec_node[i] ); - while( !new_nodes_curr.empty() ){ - Node curr = new_nodes_curr.back(); - new_nodes_curr.pop_back(); - std::vector< Node > new_nodes_tmp; - curr = simplifyRec( curr, new_nodes_tmp, visited ); - new_nodes_curr.insert( new_nodes_curr.end(), new_nodes_tmp.begin(), new_nodes_tmp.end() ); - new_nodes.push_back( curr ); + std::vector<Node> asserts; + std::vector<Node> asserts_curr; + asserts_curr.push_back(vec_node[i]); + while (!asserts_curr.empty()) + { + Node curr = asserts_curr.back(); + asserts_curr.pop_back(); + std::vector<Node> asserts_tmp; + curr = simplifyRec(curr, asserts_tmp, visited); + asserts_curr.insert( + asserts_curr.end(), asserts_tmp.begin(), asserts_tmp.end()); + asserts.push_back(curr); } - Node res = new_nodes.size()==1 ? new_nodes[0] : NodeManager::currentNM()->mkNode( kind::AND, new_nodes ); + Node res = asserts.size() == 1 + ? asserts[0] + : NodeManager::currentNM()->mkNode(kind::AND, asserts); if( res!=vec_node[i] ){ res = Rewriter::rewrite( res ); PROOF( ProofManager::currentPM()->addDependence( res, vec_node[i] ); ); diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h index fb6404aa6..113d909a8 100644 --- a/src/theory/strings/theory_strings_preprocess.h +++ b/src/theory/strings/theory_strings_preprocess.h @@ -2,9 +2,9 @@ /*! \file theory_strings_preprocess.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds, Tianyi Liang + ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -44,21 +44,41 @@ class StringsPreprocess { context::UserContext* u, SequencesStatistics& stats); ~StringsPreprocess(); + /** The reduce routine + * + * This is the main routine for constructing the reduction lemma for + * an extended function t. It returns the simplified form of t, as well + * as assertions for t, interpeted conjunctively. The reduction lemma + * for t is: + * asserts[0] ^ ... ^ asserts[n] ^ t = t' + * where t' is the term returned by this method. + * The argument sc defines the methods for generating new Skolem variables. + * The return value is t itself if it is not reduced by this class. + * + * The reduction lemma for t is a way of specifying the complete semantics + * of t. In other words, any model satisfying the reduction lemma of t + * correctly interprets t. + * + * @param t The node to reduce, + * @param asserts The vector for storing the assertions that correspond to + * the reduction of t, + * @param sc The skolem cache for generating new variables, + * @return The reduced form of t. + */ + static Node reduce(Node t, std::vector<Node>& asserts, SkolemCache* sc); /** - * Returns a node t' such that - * (exists k) new_nodes => t = t' - * is valid, where k are the free skolems introduced when constructing - * new_nodes. + * Calls the above method for the skolem cache owned by this class, and + * records statistics. */ - Node simplify(Node t, std::vector<Node>& new_nodes); + Node simplify(Node t, std::vector<Node>& asserts); /** * Applies simplifyRec on t until a fixed point is reached, and returns * the resulting term t', which is such that - * (exists k) new_nodes => t = t' + * (exists k) asserts => t = t' * is valid, where k are the free skolems introduced when constructing - * new_nodes. + * asserts. */ - Node processAssertion(Node t, std::vector<Node>& new_nodes); + Node processAssertion(Node t, std::vector<Node>& asserts); /** * Replaces all formulas t in vec_node with an equivalent formula t' that * contains no free instances of extended functions (that is, extended @@ -68,21 +88,17 @@ class StringsPreprocess { void processAssertions(std::vector<Node>& vec_node); private: - /** commonly used constants */ - Node d_zero; - Node d_one; - Node d_neg_one; /** pointer to the skolem cache used by this class */ SkolemCache* d_sc; /** Reference to the statistics for the theory of strings/sequences. */ SequencesStatistics& d_statistics; /** * Applies simplify to all top-level extended function subterms of t. New - * assertions created in this reduction are added to new_nodes. The argument + * assertions created in this reduction are added to asserts. The argument * visited stores a cache of previous results. */ Node simplifyRec(Node t, - std::vector<Node>& new_nodes, + std::vector<Node>& asserts, std::map<Node, Node>& visited); }; diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 93a32f26e..9f66c5f82 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -2,9 +2,9 @@ /*! \file theory_strings_type_rules.h ** \verbatim ** Top contributors (to current version): - ** Tianyi Liang, Morgan Deters, Tim King + ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -20,6 +20,9 @@ #ifndef CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H #define CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H +#include "expr/expr_sequence.h" +#include "expr/sequence.h" + namespace CVC4 { namespace theory { namespace strings { @@ -318,6 +321,53 @@ public: } }; +class ConstSequenceTypeRule +{ + public: + static TypeNode computeType(NodeManager* nodeManager, + TNode n, + bool check) + { + Assert(n.getKind() == kind::CONST_SEQUENCE); + return n.getConst<ExprSequence>().getSequence().getType(); + } +}; + +class SeqUnitTypeRule +{ + public: + static TypeNode computeType(NodeManager* nodeManager, + TNode n, + bool check) + { + return nodeManager->mkSequenceType(n[0].getType(check)); + } +}; + +/** Properties of the sequence type */ +struct SequenceProperties +{ + static Cardinality computeCardinality(TypeNode type) + { + Assert(type.getKind() == kind::SEQUENCE_TYPE); + return Cardinality::INTEGERS; + } + /** A sequence is well-founded if its element type is */ + static bool isWellFounded(TypeNode type) + { + return type[0].isWellFounded(); + } + /** Make ground term for sequence type (return the empty sequence) */ + static Node mkGroundTerm(TypeNode type) + { + Assert(type.isSequence()); + // empty sequence + std::vector<Expr> seq; + return NodeManager::currentNM()->mkConst( + ExprSequence(SequenceType(type.toType()), seq)); + } +}; /* struct SequenceProperties */ + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index e80607acf..3cf14fead 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -2,9 +2,9 @@ /*! \file theory_strings_utils.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -274,6 +274,8 @@ std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x) allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end())); } +bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; } + bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start) { size_t i = start; diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index fd6e5122b..803a5ffea 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -2,9 +2,9 @@ /*! \file theory_strings_utils.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -148,6 +148,15 @@ Node mkSubstrChain(Node base, std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x); /** + * Return if a string-like term n is "constant-like", that is, either a + * constant string/sequence, or an application of seq.unit. + * + * @param n The string-like term + * @return true if n is constant-like. + */ +bool isConstantLike(Node n); + +/** * Given a vector of regular expression nodes and a start index that points to * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed * by an arbitrary number of `re.allchar`s and then an `re.*(re.allchar)`. If diff --git a/src/theory/strings/type_enumerator.cpp b/src/theory/strings/type_enumerator.cpp index d24206860..6d3949514 100644 --- a/src/theory/strings/type_enumerator.cpp +++ b/src/theory/strings/type_enumerator.cpp @@ -2,9 +2,9 @@ /*! \file type_enumerator.cpp ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Mathias Preiner, Tim King ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -158,6 +158,67 @@ void StringEnumLen::mkCurr() d_curr = makeStandardModelConstant(d_witer->getData(), d_cardinality); } +SeqEnumLen::SeqEnumLen(TypeNode tn, + TypeEnumeratorProperties* tep, + uint32_t startLength) + : SEnumLen(tn, startLength) +{ + d_elementEnumerator.reset( + new TypeEnumerator(d_type.getSequenceElementType(), tep)); + mkCurr(); +} + +SeqEnumLen::SeqEnumLen(TypeNode tn, + TypeEnumeratorProperties* tep, + uint32_t startLength, + uint32_t endLength) + : SEnumLen(tn, startLength, endLength) +{ + d_elementEnumerator.reset( + new TypeEnumerator(d_type.getSequenceElementType(), tep)); + mkCurr(); +} + +SeqEnumLen::SeqEnumLen(const SeqEnumLen& wenum) + : SEnumLen(wenum), + d_elementEnumerator(new TypeEnumerator(*wenum.d_elementEnumerator)), + d_elementDomain(wenum.d_elementDomain) +{ +} + +bool SeqEnumLen::increment() +{ + if (!d_elementEnumerator->isFinished()) + { + // yet to establish domain + Assert(d_elementEnumerator != nullptr); + d_elementDomain.push_back((**d_elementEnumerator).toExpr()); + ++(*d_elementEnumerator); + } + // the current cardinality is the domain size of the element + if (!d_witer->increment(d_elementDomain.size())) + { + Assert(d_elementEnumerator->isFinished()); + d_curr = Node::null(); + return false; + } + mkCurr(); + return true; +} + +void SeqEnumLen::mkCurr() +{ + std::vector<Expr> seq; + const std::vector<unsigned>& data = d_witer->getData(); + for (unsigned i : data) + { + seq.push_back(d_elementDomain[i]); + } + // make sequence from seq + d_curr = + NodeManager::currentNM()->mkConst(ExprSequence(d_type.toType(), seq)); +} + StringEnumerator::StringEnumerator(TypeNode type, TypeEnumeratorProperties* tep) : TypeEnumeratorBase<StringEnumerator>(type), d_wenum(0, utils::getAlphabetCardinality()) @@ -182,6 +243,28 @@ StringEnumerator& StringEnumerator::operator++() bool StringEnumerator::isFinished() { return d_wenum.isFinished(); } +SequenceEnumerator::SequenceEnumerator(TypeNode type, + TypeEnumeratorProperties* tep) + : TypeEnumeratorBase<SequenceEnumerator>(type), d_wenum(type, tep, 0) +{ +} + +SequenceEnumerator::SequenceEnumerator(const SequenceEnumerator& enumerator) + : TypeEnumeratorBase<SequenceEnumerator>(enumerator.getType()), + d_wenum(enumerator.d_wenum) +{ +} + +Node SequenceEnumerator::operator*() { return d_wenum.getCurrent(); } + +SequenceEnumerator& SequenceEnumerator::operator++() +{ + d_wenum.increment(); + return *this; +} + +bool SequenceEnumerator::isFinished() { return d_wenum.isFinished(); } + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/type_enumerator.h b/src/theory/strings/type_enumerator.h index b379ce5c3..602d73059 100644 --- a/src/theory/strings/type_enumerator.h +++ b/src/theory/strings/type_enumerator.h @@ -2,9 +2,9 @@ /*! \file type_enumerator.h ** \verbatim ** Top contributors (to current version): - ** Tianyi Liang, Tim King, Andrew Reynolds + ** Andrew Reynolds, Tianyi Liang, Mathias Preiner ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -136,6 +136,34 @@ class StringEnumLen : public SEnumLen void mkCurr(); }; +/** + * Enumerates sequence values for a given length. + */ +class SeqEnumLen : public SEnumLen +{ + public: + /** For sequences */ + SeqEnumLen(TypeNode tn, TypeEnumeratorProperties* tep, uint32_t startLength); + SeqEnumLen(TypeNode tn, + TypeEnumeratorProperties* tep, + uint32_t startLength, + uint32_t endLength); + /** copy constructor */ + SeqEnumLen(const SeqEnumLen& wenum); + /** destructor */ + ~SeqEnumLen() {} + /** increment */ + bool increment() override; + + private: + /** an enumerator for the elements' type */ + std::unique_ptr<TypeEnumerator> d_elementEnumerator; + /** The domain */ + std::vector<Expr> d_elementDomain; + /** Make the current term from d_data */ + void mkCurr(); +}; + class StringEnumerator : public TypeEnumeratorBase<StringEnumerator> { public: @@ -154,6 +182,21 @@ class StringEnumerator : public TypeEnumeratorBase<StringEnumerator> StringEnumLen d_wenum; }; /* class StringEnumerator */ +class SequenceEnumerator : public TypeEnumeratorBase<SequenceEnumerator> +{ + public: + SequenceEnumerator(TypeNode type, TypeEnumeratorProperties* tep = nullptr); + SequenceEnumerator(const SequenceEnumerator& enumerator); + ~SequenceEnumerator() {} + Node operator*() override; + SequenceEnumerator& operator++() override; + bool isFinished() override; + + private: + /** underlying sequence enumerator */ + SeqEnumLen d_wenum; +}; /* class SequenceEnumerator */ + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index e9ab2652e..35b315e35 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -4,7 +4,7 @@ ** Top contributors (to current version): ** Andrew Reynolds ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -14,6 +14,7 @@ #include "theory/strings/word.h" +#include "expr/sequence.h" #include "util/string.h" using namespace CVC4::kind; @@ -26,25 +27,20 @@ Node Word::mkEmptyWord(TypeNode tn) { if (tn.isString()) { - return mkEmptyWord(CONST_STRING); + std::vector<unsigned> vec; + return NodeManager::currentNM()->mkConst(String(vec)); } - Unimplemented(); - return Node::null(); -} - -Node Word::mkEmptyWord(Kind k) -{ - NodeManager* nm = NodeManager::currentNM(); - if (k == CONST_STRING) + else if (tn.isSequence()) { - std::vector<unsigned> vec; - return nm->mkConst(String(vec)); + std::vector<Expr> seq; + return NodeManager::currentNM()->mkConst( + ExprSequence(tn.getSequenceElementType().toType(), seq)); } Unimplemented(); return Node::null(); } -Node Word::mkWord(const std::vector<Node>& xs) +Node Word::mkWordFlatten(const std::vector<Node>& xs) { Assert(!xs.empty()); NodeManager* nm = NodeManager::currentNM(); @@ -61,6 +57,23 @@ Node Word::mkWord(const std::vector<Node>& xs) } return nm->mkConst(String(vec)); } + else if (k == CONST_SEQUENCE) + { + std::vector<Expr> seq; + TypeNode tn = xs[0].getType(); + for (TNode x : xs) + { + Assert(x.getType() == tn); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const std::vector<Node>& vecc = sx.getVec(); + for (const Node& c : vecc) + { + seq.push_back(c.toExpr()); + } + } + return NodeManager::currentNM()->mkConst( + ExprSequence(tn.getSequenceElementType().toType(), seq)); + } Unimplemented(); return Node::null(); } @@ -72,17 +85,21 @@ size_t Word::getLength(TNode x) { return x.getConst<String>().size(); } - Unimplemented(); + else if (k == CONST_SEQUENCE) + { + return x.getConst<ExprSequence>().getSequence().size(); + } + Unimplemented() << "Word::getLength on " << x; return 0; } std::vector<Node> Word::getChars(TNode x) { Kind k = x.getKind(); + std::vector<Node> ret; + NodeManager* nm = NodeManager::currentNM(); if (k == CONST_STRING) { - std::vector<Node> ret; - NodeManager* nm = NodeManager::currentNM(); std::vector<unsigned> ccVec; const std::vector<unsigned>& cvec = x.getConst<String>().getVec(); for (unsigned chVal : cvec) @@ -94,8 +111,18 @@ std::vector<Node> Word::getChars(TNode x) } return ret; } + else if (k == CONST_SEQUENCE) + { + Type t = x.getConst<ExprSequence>().getType(); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const std::vector<Node>& vec = sx.getVec(); + for (const Node& v : vec) + { + ret.push_back(nm->mkConst(ExprSequence(t, {v.toExpr()}))); + } + return ret; + } Unimplemented(); - std::vector<Node> ret; return ret; } @@ -111,6 +138,13 @@ bool Word::strncmp(TNode x, TNode y, std::size_t n) String sy = y.getConst<String>(); return sx.strncmp(sy, n); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.strncmp(sy, n); + } Unimplemented(); return false; } @@ -125,6 +159,13 @@ bool Word::rstrncmp(TNode x, TNode y, std::size_t n) String sy = y.getConst<String>(); return sx.rstrncmp(sy, n); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.rstrncmp(sy, n); + } Unimplemented(); return false; } @@ -139,6 +180,13 @@ std::size_t Word::find(TNode x, TNode y, std::size_t start) String sy = y.getConst<String>(); return sx.find(sy, start); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.find(sy, start); + } Unimplemented(); return 0; } @@ -153,6 +201,13 @@ std::size_t Word::rfind(TNode x, TNode y, std::size_t start) String sy = y.getConst<String>(); return sx.rfind(sy, start); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.rfind(sy, start); + } Unimplemented(); return 0; } @@ -167,6 +222,13 @@ bool Word::hasPrefix(TNode x, TNode y) String sy = y.getConst<String>(); return sx.hasPrefix(sy); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.hasPrefix(sy); + } Unimplemented(); return false; } @@ -181,6 +243,13 @@ bool Word::hasSuffix(TNode x, TNode y) String sy = y.getConst<String>(); return sx.hasSuffix(sy); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.hasSuffix(sy); + } Unimplemented(); return false; } @@ -198,6 +267,16 @@ Node Word::replace(TNode x, TNode y, TNode t) String st = t.getConst<String>(); return nm->mkConst(String(sx.replace(sy, st))); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + Assert(t.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + const Sequence& st = t.getConst<ExprSequence>().getSequence(); + Sequence res = sx.replace(sy, st); + return nm->mkConst(res.toExprSequence()); + } Unimplemented(); return Node::null(); } @@ -210,6 +289,12 @@ Node Word::substr(TNode x, std::size_t i) String sx = x.getConst<String>(); return nm->mkConst(String(sx.substr(i))); } + else if (k == CONST_SEQUENCE) + { + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + Sequence res = sx.substr(i); + return nm->mkConst(res.toExprSequence()); + } Unimplemented(); return Node::null(); } @@ -222,6 +307,12 @@ Node Word::substr(TNode x, std::size_t i, std::size_t j) String sx = x.getConst<String>(); return nm->mkConst(String(sx.substr(i, j))); } + else if (k == CONST_SEQUENCE) + { + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + Sequence res = sx.substr(i, j); + return nm->mkConst(res.toExprSequence()); + } Unimplemented(); return Node::null(); } @@ -237,6 +328,12 @@ Node Word::suffix(TNode x, std::size_t i) String sx = x.getConst<String>(); return nm->mkConst(String(sx.suffix(i))); } + else if (k == CONST_SEQUENCE) + { + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + Sequence res = sx.suffix(i); + return nm->mkConst(res.toExprSequence()); + } Unimplemented(); return Node::null(); } @@ -251,6 +348,13 @@ bool Word::noOverlapWith(TNode x, TNode y) String sy = y.getConst<String>(); return sx.noOverlapWith(sy); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.noOverlapWith(sy); + } Unimplemented(); return false; } @@ -265,6 +369,13 @@ std::size_t Word::overlap(TNode x, TNode y) String sy = y.getConst<String>(); return sx.overlap(sy); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.overlap(sy); + } Unimplemented(); return 0; } @@ -279,10 +390,32 @@ std::size_t Word::roverlap(TNode x, TNode y) String sy = y.getConst<String>(); return sx.roverlap(sy); } + else if (k == CONST_SEQUENCE) + { + Assert(y.getKind() == CONST_SEQUENCE); + const Sequence& sx = x.getConst<ExprSequence>().getSequence(); + const Sequence& sy = y.getConst<ExprSequence>().getSequence(); + return sx.roverlap(sy); + } Unimplemented(); return 0; } +bool Word::isRepeated(TNode x) +{ + Kind k = x.getKind(); + if (k == CONST_STRING) + { + return x.getConst<String>().isRepeated(); + } + else if (k == CONST_SEQUENCE) + { + return x.getConst<ExprSequence>().getSequence().isRepeated(); + } + Unimplemented(); + return false; +} + Node Word::splitConstant(TNode x, TNode y, size_t& index, bool isRev) { Assert(x.isConst() && y.isConst()); diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index b84ea6874..3b15b763a 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -2,9 +2,9 @@ /*! \file word.h ** \verbatim ** Top contributors (to current version): - ** Andrew Reynolds + ** Andrew Reynolds, Andres Noetzli ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim @@ -33,11 +33,8 @@ class Word /** make empty constant of type tn */ static Node mkEmptyWord(TypeNode tn); - /** make empty constant of kind k */ - static Node mkEmptyWord(Kind k); - /** make word from constants in (non-empty) vector vec */ - static Node mkWord(const std::vector<Node>& xs); + static Node mkWordFlatten(const std::vector<Node>& xs); /** Return the length of word x */ static size_t getLength(TNode x); @@ -139,6 +136,8 @@ class Word * Notice that x.overlap(y) = y.roverlap(x) */ static std::size_t roverlap(TNode x, TNode y); + /** Return true if word x is a repetition of the same character */ + static bool isRepeated(TNode x); /** Split constant * * This returns the suffix remainder (resp. prefix remainder when isRev is |