diff options
Diffstat (limited to 'src/theory')
-rw-r--r-- | src/theory/strings/core_solver.cpp | 58 | ||||
-rw-r--r-- | src/theory/strings/core_solver.h | 8 | ||||
-rw-r--r-- | src/theory/strings/extf_solver.cpp | 15 | ||||
-rw-r--r-- | src/theory/strings/extf_solver.h | 6 | ||||
-rw-r--r-- | src/theory/strings/inference_manager.cpp | 322 | ||||
-rw-r--r-- | src/theory/strings/inference_manager.h | 135 | ||||
-rw-r--r-- | src/theory/strings/strings_fmf.cpp | 27 | ||||
-rw-r--r-- | src/theory/strings/strings_fmf.h | 22 | ||||
-rw-r--r-- | src/theory/strings/term_registry.cpp | 578 | ||||
-rw-r--r-- | src/theory/strings/term_registry.h | 261 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.cpp | 211 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.h | 58 |
12 files changed, 929 insertions, 772 deletions
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index abcd8189a..e574e4fa9 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -30,10 +30,13 @@ namespace CVC4 { namespace theory { namespace strings { -CoreSolver::CoreSolver(context::Context* c, context::UserContext* u, SolverState& s, InferenceManager& im, SkolemCache& skc, BaseSolver& bs) : -d_state(s), d_im(im), d_skCache(skc), -d_bsolver(bs), -d_nf_pairs(c) +CoreSolver::CoreSolver(context::Context* c, + context::UserContext* u, + SolverState& s, + InferenceManager& im, + TermRegistry& tr, + BaseSolver& bs) + : d_state(s), d_im(im), d_termReg(tr), d_bsolver(bs), d_nf_pairs(c) { d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); @@ -1252,7 +1255,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // If the equality rewrites to a constant, we must use the // purify variable for this string to communicate that // we have inferred whether it is empty. - Node p = d_skCache.mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym"); + SkolemCache* skc = d_termReg.getSkolemCache(); + Node p = skc->mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym"); Node pEq = p.eqNode(emp); // should not be constant Assert(!Rewriter::rewrite(pEq).isConst()); @@ -1326,7 +1330,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Node prea = p == straLen ? stra : (isRev ? Word::suffix(stra, p) : Word::prefix(stra, p)); - Node sk = d_skCache.mkSkolemCached( + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk = skc->mkSkolemCached( nc, prea, isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT, @@ -1352,7 +1357,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Node firstChar = straLen == 1 ? stra : (isRev ? Word::suffix(stra, 1) : Word::prefix(stra, 1)); - Node sk = d_skCache.mkSkolemCached( + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk = skc->mkSkolemCached( nc, isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT, "c_spt"); @@ -1425,7 +1431,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, info.d_antn.push_back(tnz); } } - Node sk = d_skCache.mkSkolemCached( + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk = skc->mkSkolemCached( x, y, isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, @@ -1664,10 +1671,11 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking." << std::endl; // right - Node sk_w = d_skCache.mkSkolem("w_loop"); - Node sk_y = d_skCache.mkSkolem("y_loop"); - d_im.registerTermAtomic(sk_y, LENGTH_GEQ_ONE); - Node sk_z = d_skCache.mkSkolem("z_loop"); + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk_w = skc->mkSkolem("w_loop"); + Node sk_y = skc->mkSkolem("y_loop"); + d_termReg.registerTermAtomic(sk_y, LENGTH_GEQ_ONE); + Node sk_z = skc->mkSkolem("z_loop"); // t1 * ... * tn = y * z Node conc1 = t_yz.eqNode(utils::mkNConcat(sk_y, sk_z)); // s1 * ... * sk = z * y * r @@ -1838,10 +1846,11 @@ void CoreSolver::processDeq(Node ni, Node nj) // // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" ---> // x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++ k2) - Node sk = d_skCache.mkSkolemCached( - nck, SkolemCache::SK_ID_DC_SPT, "dc_spt"); - d_im.registerTermAtomic(sk, LENGTH_ONE); - Node skr = d_skCache.mkSkolemCached( + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk = + skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt"); + d_termReg.registerTermAtomic(sk, LENGTH_ONE); + Node skr = skc->mkSkolemCached( nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem"); Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr)); eq1 = Rewriter::rewrite(eq1); @@ -1884,13 +1893,14 @@ void CoreSolver::processDeq(Node ni, Node nj) antecNewLits.push_back(xLenTerm.eqNode(yLenTerm).negate()); std::vector<Node> conc; - Node sk1 = d_skCache.mkSkolemCached( - x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); - Node sk2 = d_skCache.mkSkolemCached( - x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); - Node sk3 = d_skCache.mkSkolemCached( - x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit"); - d_im.registerTermAtomic(sk3, LENGTH_GEQ_ONE); + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk1 = + skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); + Node sk2 = + skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); + Node sk3 = + skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit"); + d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE); Node sk1Len = utils::mkNLength(sk1); conc.push_back(sk1Len.eqNode(xLenTerm)); Node sk2Len = utils::mkNLength(sk2); @@ -2294,7 +2304,7 @@ void CoreSolver::doInferInfo(const InferInfo& ii) { for (const Node& n : sks.second) { - d_im.registerTermAtomic(n, sks.first); + d_termReg.registerTermAtomic(n, sks.first); } } } diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h index b7b487ff7..cef5bd3be 100644 --- a/src/theory/strings/core_solver.h +++ b/src/theory/strings/core_solver.h @@ -24,8 +24,8 @@ #include "theory/strings/infer_info.h" #include "theory/strings/inference_manager.h" #include "theory/strings/normal_form.h" -#include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" +#include "theory/strings/term_registry.h" namespace CVC4 { namespace theory { @@ -46,7 +46,7 @@ class CoreSolver context::UserContext* u, SolverState& s, InferenceManager& im, - SkolemCache& skc, + TermRegistry& tr, BaseSolver& bs); ~CoreSolver(); @@ -370,8 +370,8 @@ class CoreSolver SolverState& d_state; /** The (custom) output channel of the theory of strings */ InferenceManager& d_im; - /** cache of all skolems */ - SkolemCache& d_skCache; + /** Reference to the term registry of theory of strings */ + TermRegistry& d_termReg; /** reference to the base solver, used for certain queries */ BaseSolver& d_bsolver; /** Commonly used constants */ diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 0f4af2458..1bcd67cb4 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -31,7 +31,7 @@ ExtfSolver::ExtfSolver(context::Context* c, context::UserContext* u, SolverState& s, InferenceManager& im, - SkolemCache& skc, + TermRegistry& tr, StringsRewriter& rewriter, BaseSolver& bs, CoreSolver& cs, @@ -39,13 +39,13 @@ ExtfSolver::ExtfSolver(context::Context* c, SequencesStatistics& statistics) : d_state(s), d_im(im), - d_skCache(skc), + d_termReg(tr), d_rewriter(rewriter), d_bsolver(bs), d_csolver(cs), d_extt(et), d_statistics(statistics), - d_preproc(&skc, u, statistics), + d_preproc(d_termReg.getSkolemCache(), u, statistics), d_hasExtf(c, false), d_extfInferCache(c), d_reduced(u) @@ -160,10 +160,9 @@ bool ExtfSolver::doReduction(int effort, Node n) Node x = n[0]; Node s = n[1]; // positive contains reduces to a equality - Node sk1 = - d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1"); - Node sk2 = - d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2"); + SkolemCache* skc = d_termReg.getSkolemCache(); + Node sk1 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1"); + Node sk2 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2"); Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2))); std::vector<Node> exp_vec; exp_vec.push_back(n); @@ -305,7 +304,7 @@ void ExtfSolver::checkExtfEval(int effort) // only use symbolic definitions if option is set if (options::stringInferSym()) { - nrs = d_im.getSymbolicDefinition(sn, exps); + nrs = d_termReg.getSymbolicDefinition(sn, exps); } if (!nrs.isNull()) { diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h index f0f0d790b..c5dd24f70 100644 --- a/src/theory/strings/extf_solver.h +++ b/src/theory/strings/extf_solver.h @@ -87,7 +87,7 @@ class ExtfSolver context::UserContext* u, SolverState& s, InferenceManager& im, - SkolemCache& skc, + TermRegistry& tr, StringsRewriter& rewriter, BaseSolver& bs, CoreSolver& cs, @@ -181,8 +181,8 @@ class ExtfSolver SolverState& d_state; /** The (custom) output channel of the theory of strings */ InferenceManager& d_im; - /** cache of all skolems */ - SkolemCache& d_skCache; + /** Reference to the term registry of theory of strings */ + TermRegistry& d_termReg; /** The theory rewriter for this theory. */ StringsRewriter& d_rewriter; /** reference to the base solver, used for certain queries */ diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp index d19ce538d..b6bbcb7df 100644 --- a/src/theory/strings/inference_manager.cpp +++ b/src/theory/strings/inference_manager.cpp @@ -34,18 +34,15 @@ InferenceManager::InferenceManager(TheoryStrings& p, context::Context* c, context::UserContext* u, SolverState& s, - SkolemCache& skc, + TermRegistry& tr, OutputChannel& out, SequencesStatistics& statistics) : d_parent(p), d_state(s), - d_skCache(skc), + d_termReg(tr), d_out(out), d_statistics(statistics), - d_keep(c), - d_proxyVar(u), - d_proxyVarToLength(u), - d_lengthLemmaTermsCache(u) + d_keep(c) { NodeManager* nm = NodeManager::currentNM(); d_zero = nm->mkConst(Rational(0)); @@ -233,7 +230,7 @@ void InferenceManager::sendInfer(Node eq_exp, Node eq, Inference infer) std::vector<Node> vars; std::vector<Node> subs; std::vector<Node> unproc; - inferSubstitutionProxyVars(eq_exp, vars, subs, unproc); + d_termReg.inferSubstitutionProxyVars(eq_exp, vars, subs, unproc); if (unproc.empty()) { Node eqs = @@ -297,235 +294,6 @@ void InferenceManager::sendPhaseRequirement(Node lit, bool pol) d_pendingReqPhase[lit] = pol; } -Node InferenceManager::getProxyVariableFor(Node n) const -{ - NodeNodeMap::const_iterator it = d_proxyVar.find(n); - if (it != d_proxyVar.end()) - { - return (*it).second; - } - return Node::null(); -} - -Node InferenceManager::getSymbolicDefinition(Node n, - std::vector<Node>& exp) const -{ - if (n.getNumChildren() == 0) - { - Node pn = getProxyVariableFor(n); - if (pn.isNull()) - { - return Node::null(); - } - Node eq = n.eqNode(pn); - eq = Rewriter::rewrite(eq); - if (std::find(exp.begin(), exp.end(), eq) == exp.end()) - { - exp.push_back(eq); - } - return pn; - } - std::vector<Node> children; - if (n.getMetaKind() == metakind::PARAMETERIZED) - { - children.push_back(n.getOperator()); - } - for (const Node& nc : n) - { - if (n.getType().isRegExp()) - { - children.push_back(nc); - } - else - { - Node ns = getSymbolicDefinition(nc, exp); - if (ns.isNull()) - { - return Node::null(); - } - else - { - children.push_back(ns); - } - } - } - return NodeManager::currentNM()->mkNode(n.getKind(), children); -} - -Node InferenceManager::registerTerm(Node n) -{ - Assert(n.getType().isStringLike()); - NodeManager* nm = NodeManager::currentNM(); - // register length information: - // for variables, split on empty vs positive length - // for concat/const/replace, introduce proxy var and state length relation - Node lsum; - if (n.getKind() != STRING_CONCAT && !n.isConst()) - { - Node lsumb = nm->mkNode(STRING_LENGTH, n); - lsum = Rewriter::rewrite(lsumb); - // can register length term if it does not rewrite - if (lsum == lsumb) - { - registerTermAtomic(n, LENGTH_SPLIT); - return Node::null(); - } - } - Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym"); - StringsProxyVarAttribute spva; - sk.setAttribute(spva, true); - Node eq = Rewriter::rewrite(sk.eqNode(n)); - d_proxyVar[n] = sk; - // If we are introducing a proxy for a constant or concat term, we do not - // need to send lemmas about its length, since its length is already - // implied. - if (n.isConst() || n.getKind() == STRING_CONCAT) - { - // do not send length lemma for sk. - registerTermAtomic(sk, LENGTH_IGNORE); - } - Node skl = nm->mkNode(STRING_LENGTH, sk); - if (n.getKind() == STRING_CONCAT) - { - std::vector<Node> nodeVec; - for (const Node& nc : n) - { - if (nc.getAttribute(StringsProxyVarAttribute())) - { - Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end()); - nodeVec.push_back(d_proxyVarToLength[nc]); - } - else - { - Node lni = nm->mkNode(STRING_LENGTH, nc); - nodeVec.push_back(lni); - } - } - lsum = nm->mkNode(PLUS, nodeVec); - lsum = Rewriter::rewrite(lsum); - } - else if (n.isConst()) - { - lsum = nm->mkConst(Rational(Word::getLength(n))); - } - Assert(!lsum.isNull()); - d_proxyVarToLength[sk] = lsum; - Node ceq = Rewriter::rewrite(skl.eqNode(lsum)); - - return nm->mkNode(AND, eq, ceq); -} - -void InferenceManager::registerTermAtomic(Node n, LengthStatus s) -{ - if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end()) - { - return; - } - d_lengthLemmaTermsCache.insert(n); - - if (s == LENGTH_IGNORE) - { - // ignore it - return; - } - std::map<Node, bool> reqPhase; - Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase); - if (!lenLem.isNull()) - { - Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem - << std::endl; - Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl; - ++(d_statistics.d_lemmasRegisterTermAtomic); - d_out.lemma(lenLem); - } - for (const std::pair<const Node, bool>& rp : reqPhase) - { - d_out.requirePhase(rp.first, rp.second); - } -} - -Node InferenceManager::getRegisterTermAtomicLemma( - Node n, LengthStatus s, std::map<Node, bool>& reqPhase) -{ - NodeManager* nm = NodeManager::currentNM(); - Node n_len = nm->mkNode(kind::STRING_LENGTH, n); - - if (s == LENGTH_GEQ_ONE) - { - Node emp = Word::mkEmptyWord(n.getType()); - Node neq_empty = n.eqNode(emp).negate(); - Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero); - Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z); - Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one - << std::endl; - Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl; - return len_geq_one; - } - - if (s == LENGTH_ONE) - { - Node len_one = n_len.eqNode(d_one); - Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one - << std::endl; - Trace("strings-assert") << "(assert " << len_one << ")" << std::endl; - return len_one; - } - Assert(s == LENGTH_SPLIT); - - Node emp = Word::mkEmptyWord(n.getType()); - std::vector<Node> lems; - // split whether the string is empty - Node n_len_eq_z = n_len.eqNode(d_zero); - Node n_len_eq_z_2 = n.eqNode(emp); - Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2); - case_empty = Rewriter::rewrite(case_empty); - Node case_nempty = nm->mkNode(GT, n_len, d_zero); - if (!case_empty.isConst()) - { - Node lem = nm->mkNode(OR, case_empty, case_nempty); - lems.push_back(lem); - Trace("strings-lemma") << "Strings::Lemma LENGTH >= 0 : " << lem - << std::endl; - // prefer trying the empty case first - // notice that requirePhase must only be called on rewritten literals that - // occur in the CNF stream. - n_len_eq_z = Rewriter::rewrite(n_len_eq_z); - Assert(!n_len_eq_z.isConst()); - reqPhase[n_len_eq_z] = true; - n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2); - Assert(!n_len_eq_z_2.isConst()); - reqPhase[n_len_eq_z_2] = true; - } - else if (!case_empty.getConst<bool>()) - { - // the rewriter knows that n is non-empty - Trace("strings-lemma") << "Strings::Lemma LENGTH > 0 (non-empty): " - << case_nempty << std::endl; - lems.push_back(case_nempty); - } - else - { - // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that - // n ---> "". Since this method is only called on non-constants n, it must - // be that n = "" ^ len( n ) = 0 does not rewrite to true. - Assert(false); - } - - // additionally add len( x ) >= 0 ? - if (options::stringLenGeqZ()) - { - Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero); - n_len_geq = Rewriter::rewrite(n_len_geq); - lems.push_back(n_len_geq); - } - - if (lems.empty()) - { - return Node::null(); - } - return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems); -} - void InferenceManager::addToExplanation(Node a, Node b, std::vector<Node>& exp) const @@ -601,88 +369,6 @@ bool InferenceManager::hasProcessed() const return d_state.isInConflict() || !d_pendingLem.empty() || !d_pending.empty(); } -void InferenceManager::inferSubstitutionProxyVars( - Node n, - std::vector<Node>& vars, - std::vector<Node>& subs, - std::vector<Node>& unproc) const -{ - if (n.getKind() == AND) - { - for (const Node& nc : n) - { - inferSubstitutionProxyVars(nc, vars, subs, unproc); - } - return; - } - if (n.getKind() == EQUAL) - { - Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end()); - ns = Rewriter::rewrite(ns); - if (ns.getKind() == EQUAL) - { - Node s; - Node v; - for (unsigned i = 0; i < 2; i++) - { - Node ss; - // determine whether this side has a proxy variable - if (ns[i].getAttribute(StringsProxyVarAttribute())) - { - // it is a proxy variable - ss = ns[i]; - } - else if (ns[i].isConst()) - { - ss = getProxyVariableFor(ns[i]); - } - if (!ss.isNull()) - { - v = ns[1 - i]; - // if the other side is a constant or variable - if (v.getNumChildren() == 0) - { - if (s.isNull()) - { - s = ss; - } - else - { - // both sides of the equality correspond to a proxy variable - if (ss == s) - { - // it is a trivial equality, e.g. between a proxy variable - // and its definition - return; - } - else - { - // equality between proxy variables, non-trivial - s = Node::null(); - } - } - } - } - } - if (!s.isNull()) - { - // the equality can be turned into a substitution - subs.push_back(s); - vars.push_back(v); - return; - } - } - else - { - n = ns; - } - } - if (n != d_true) - { - unproc.push_back(n); - } -} - Node InferenceManager::mkExplain(const std::vector<Node>& a) const { std::vector<Node> an; diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h index 60139ca83..c1550328c 100644 --- a/src/theory/strings/inference_manager.h +++ b/src/theory/strings/inference_manager.h @@ -26,8 +26,8 @@ #include "theory/output_channel.h" #include "theory/strings/infer_info.h" #include "theory/strings/sequences_stats.h" -#include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" +#include "theory/strings/term_registry.h" #include "theory/uf/equality_engine.h" namespace CVC4 { @@ -76,7 +76,7 @@ class InferenceManager context::Context* c, context::UserContext* u, SolverState& s, - SkolemCache& skc, + TermRegistry& tr, OutputChannel& out, SequencesStatistics& statistics); ~InferenceManager() {} @@ -176,60 +176,6 @@ class InferenceManager */ void sendPhaseRequirement(Node lit, bool pol); - //---------------------------- proxy variables and length elaboration - /** Get symbolic definition - * - * This method returns the "symbolic definition" of n, call it n', and - * populates the vector exp with an explanation such that exp => n = n'. - * - * The symbolic definition of n is the term where (maximal) subterms of n - * are replaced by their proxy variables. For example, if we introduced - * proxy variable v for x ++ y, then given input x ++ y = w, this method - * returns v = w and adds v = x ++ y to exp. - */ - Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const; - /** Get proxy variable - * - * If this method returns the proxy variable for (string) term n if one - * exists, otherwise it returns null. - */ - Node getProxyVariableFor(Node n) const; - /** register term - * - * This method is called on non-constant string terms n. It returns a lemma - * that should be sent on the output channel of theory of strings upon - * registration of this term, or null if no lemma is necessary. - * - * If n is an atomic term, the method registerTermAtomic is called for n - * and s = LENGTH_SPLIT and no lemma is returned. - */ - Node registerTerm(Node n); - /** register length - * - * This method is called on non-constant string terms n that are "atomic" - * with respect to length. That is, the rewritten form of len(n) is itself. - * - * It sends a lemma on the output channel that ensures that the length n - * satisfies its assigned status (given by argument s). - * - * If the status is LENGTH_ONE, we send the lemma len( n ) = 1. - * - * If the status is LENGTH_GEQ, we send a lemma n != "" ^ len( n ) > 0. - * - * If the status is LENGTH_SPLIT, we send a send a lemma of the form: - * ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0 - * This method also ensures that, when applicable, the left branch is taken - * first via calls to requirePhase. - * - * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used - * e.g. when the length of n is already implied by other constraints. - * - * In contrast to the above functions, it makes immediate calls to the output - * channel instead of adding them to pending lists. - */ - void registerTermAtomic(Node n, LengthStatus s); - //---------------------------- end proxy variables and length elaboration - //----------------------------constructing antecedants /** * Adds equality a = b to the vector exp if a and b are distinct terms. It @@ -326,32 +272,15 @@ class InferenceManager * equality engine of this class. */ void sendInfer(Node eq_exp, Node eq, Inference infer); - /** - * Get the lemma required for registering the length information for - * atomic term n given length status s. For details, see registerTermAtomic. - * - * Additionally, this method may map literals to a required polarity in the - * argument reqPhase, which should be processed by a call to requiredPhase by - * the caller of this method. - */ - Node getRegisterTermAtomicLemma(Node n, - LengthStatus s, - std::map<Node, bool>& reqPhase); /** the parent theory of strings object */ TheoryStrings& d_parent; - /** - * This is a reference to the solver state of the theory of strings. - */ + /** Reference to the solver state of the theory of strings. */ SolverState& d_state; - /** cache of all skolems */ - SkolemCache& d_skCache; - /** the output channel - * - * This is a reference to the output channel of the theory of strings. - */ + /** Reference to the term registry of theory of strings */ + TermRegistry& d_termReg; + /** Reference to the output channel of the theory of strings. */ OutputChannel& d_out; - /** Reference to the statistics for the theory of strings/sequences. */ SequencesStatistics& d_statistics; @@ -375,58 +304,6 @@ class InferenceManager * SAT-context-dependent. */ NodeSet d_keep; - /** - * Map string terms to their "proxy variables". Proxy variables are used are - * intermediate variables so that length information can be communicated for - * constants. For example, to communicate that "ABC" has length 3, we - * introduce a proxy variable v_{"ABC"} for "ABC", and assert: - * v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3 - * Notice this is required since we cannot directly write len( "ABC" ) = 3, - * which rewrites to 3 = 3. - * In the above example, we store "ABC" -> v_{"ABC"} in this map. - */ - NodeNodeMap d_proxyVar; - /** - * Map from proxy variables to their normalized length. In the above example, - * we store "ABC" -> 3. - */ - NodeNodeMap d_proxyVarToLength; - /** List of terms that we have register length for */ - NodeSet d_lengthLemmaTermsCache; - - /** infer substitution proxy vars - * - * This method attempts to (partially) convert the formula n into a - * substitution of the form: - * v1 -> s1, ..., vn -> sn - * where s1 ... sn are proxy variables and v1 ... vn are either variables - * or constants. - * - * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent - * to P ^ n, where P is the conjunction of equalities corresponding to the - * definition of all proxy variables introduced by the theory of strings. - * - * For example, say that v1 was introduced as a proxy variable for "ABC", and - * v2 was introduced as a proxy variable for "AA". - * - * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets: - * vars = { x }, - * subs = { v2 }, - * unproc = {}. - * In particular, this says that the information content of n is essentially - * x = v2. The first and third conjunctions can be dropped from the - * explanation since these equalities simply correspond to definitions - * of proxy variables. - * - * This method is used as a performance heuristic. It can infer when the - * explanation of a fact depends only trivially on equalities corresponding - * to definitions of proxy variables, which can be omitted since they are - * assumed to hold globally. - */ - void inferSubstitutionProxyVars(Node n, - std::vector<Node>& vars, - std::vector<Node>& subs, - std::vector<Node>& unproc) const; }; } // namespace strings diff --git a/src/theory/strings/strings_fmf.cpp b/src/theory/strings/strings_fmf.cpp index 02af3949e..87ce5e7c6 100644 --- a/src/theory/strings/strings_fmf.cpp +++ b/src/theory/strings/strings_fmf.cpp @@ -26,45 +26,26 @@ namespace strings { StringsFmf::StringsFmf(context::Context* c, context::UserContext* u, Valuation valuation, - SkolemCache& skc) + TermRegistry& tr) : d_sslds(nullptr), d_satContext(c), d_userContext(u), d_valuation(valuation), - d_skCache(skc), - d_inputVars(u) + d_termReg(tr) { } StringsFmf::~StringsFmf() {} -void StringsFmf::preRegisterTerm(TNode n) -{ - if (!n.getType().isStringLike()) - { - return; - } - Kind k = n.getKind(); - // Our decision strategy will minimize the length of this term if it is a - // variable but not an internally generated Skolem, or a term that does - // not belong to this theory. - if (n.isVar() ? !d_skCache.isSkolem(n) : kindToTheoryId(k) != THEORY_STRINGS) - { - d_inputVars.insert(n); - Trace("strings-dstrat-reg") << "input variable: " << n << std::endl; - } -} - void StringsFmf::presolve() { d_sslds.reset(new StringSumLengthDecisionStrategy( d_satContext, d_userContext, d_valuation)); Trace("strings-dstrat-reg") << "presolve: register decision strategy." << std::endl; + const NodeSet& ivars = d_termReg.getInputVars(); std::vector<Node> inputVars; - for (NodeSet::const_iterator itr = d_inputVars.begin(); - itr != d_inputVars.end(); - ++itr) + for (NodeSet::const_iterator itr = ivars.begin(); itr != ivars.end(); ++itr) { inputVars.push_back(*itr); } diff --git a/src/theory/strings/strings_fmf.h b/src/theory/strings/strings_fmf.h index 375a100ff..9dfb1f590 100644 --- a/src/theory/strings/strings_fmf.h +++ b/src/theory/strings/strings_fmf.h @@ -22,7 +22,7 @@ #include "context/context.h" #include "expr/node.h" #include "theory/decision_strategy.h" -#include "theory/strings/skolem_cache.h" +#include "theory/strings/term_registry.h" #include "theory/valuation.h" namespace CVC4 { @@ -42,15 +42,8 @@ class StringsFmf StringsFmf(context::Context* c, context::UserContext* u, Valuation valuation, - SkolemCache& skc); + TermRegistry& tr); ~StringsFmf(); - /** preRegister term - * - * This determines if the term n should be added to d_inputVars, the set - * of terms of type string whose length we are minimizing with this decision - * strategy. - */ - void preRegisterTerm(TNode n); /** presolve * * This initializes a (new copy) of the decision strategy d_sslds. @@ -105,15 +98,10 @@ class StringsFmf context::Context* d_satContext; /** The user level assertion context for the theory of strings. */ context::UserContext* d_userContext; - /** The valuation object */ + /** The valuation object of theory of strings */ Valuation d_valuation; - /** reference to the skolem cache */ - SkolemCache& d_skCache; - /** - * The set of terms of type string whose length we are minimizing - * with this decision strategy. - */ - NodeSet d_inputVars; + /** The term registry of theory of strings */ + TermRegistry& d_termReg; }; } // namespace strings diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp new file mode 100644 index 000000000..4f1e41ebb --- /dev/null +++ b/src/theory/strings/term_registry.cpp @@ -0,0 +1,578 @@ +/********************* */ +/*! \file term_registry.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of term registry for the theory of strings. + **/ + +#include "theory/strings/term_registry.h" + +#include "expr/attribute.h" +#include "options/strings_options.h" +#include "smt/logic_exception.h" +#include "theory/rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" + +using namespace std; +using namespace CVC4::context; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +struct StringsProxyVarAttributeId +{ +}; +typedef expr::Attribute<StringsProxyVarAttributeId, bool> + StringsProxyVarAttribute; + +TermRegistry::TermRegistry(context::Context* c, + context::UserContext* u, + eq::EqualityEngine& ee, + OutputChannel& out, + SequencesStatistics& statistics) + : d_ee(ee), + d_out(out), + d_statistics(statistics), + d_hasStrCode(false), + d_functionsTerms(c), + d_inputVars(u), + d_preregisteredTerms(u), + d_registeredTerms(u), + d_registeredTypes(u), + d_proxyVar(u), + d_proxyVarToLength(u), + d_lengthLemmaTermsCache(u) +{ + NodeManager* nm = NodeManager::currentNM(); + d_zero = nm->mkConst(Rational(0)); + d_one = nm->mkConst(Rational(1)); + d_negOne = NodeManager::currentNM()->mkConst(Rational(-1)); + d_cardSize = utils::getAlphabetCardinality(); +} + +TermRegistry::~TermRegistry() {} + +void TermRegistry::preRegisterTerm(TNode n) +{ + if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end()) + { + return; + } + d_preregisteredTerms.insert(n); + Trace("strings-preregister") + << "TheoryString::preregister : " << n << std::endl; + // check for logic exceptions + Kind k = n.getKind(); + if (!options::stringExp()) + { + if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI + || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN + || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER + || k == STRING_REV) + { + std::stringstream ss; + ss << "Term of kind " << k + << " not supported in default mode, try --strings-exp"; + throw LogicException(ss.str()); + } + } + if (k == EQUAL) + { + d_ee.addTriggerEquality(n); + return; + } + else if (k == STRING_IN_REGEXP) + { + d_out.requirePhase(n, true); + d_ee.addTriggerPredicate(n); + d_ee.addTerm(n[0]); + d_ee.addTerm(n[1]); + return; + } + else if (k == STRING_TO_CODE) + { + d_hasStrCode = true; + } + registerTerm(n, 0); + TypeNode tn = n.getType(); + if (tn.isRegExp() && n.isVar()) + { + std::stringstream ss; + ss << "Regular expression variables are not supported."; + throw LogicException(ss.str()); + } + if (tn.isString()) + { + // all characters of constants should fall in the alphabet + if (n.isConst()) + { + std::vector<unsigned> vec = n.getConst<String>().getVec(); + for (unsigned u : vec) + { + if (u >= d_cardSize) + { + std::stringstream ss; + ss << "Characters in string \"" << n + << "\" are outside of the given alphabet."; + throw LogicException(ss.str()); + } + } + } + d_ee.addTerm(n); + } + else if (tn.isBoolean()) + { + // Get triggered for both equal and dis-equal + d_ee.addTriggerPredicate(n); + } + else + { + // Function applications/predicates + d_ee.addTerm(n); + } + // Set d_functionsTerms stores all function applications that are + // relevant to theory combination. Notice that this is a subset of + // the applications whose kinds are function kinds in the equality + // engine. This means it does not include applications of operators + // like re.++, which is not a function kind in the equality engine. + // Concatenation terms do not need to be considered here because + // their arguments have string type and do not introduce any shared + // terms. + if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT) + { + d_functionsTerms.push_back(n); + } + if (options::stringFMF()) + { + if (tn.isStringLike()) + { + // Our decision strategy will minimize the length of this term if it is a + // variable but not an internally generated Skolem, or a term that does + // not belong to this theory. + if (n.isVar() ? !d_skCache.isSkolem(n) + : kindToTheoryId(k) != THEORY_STRINGS) + { + d_inputVars.insert(n); + Trace("strings-preregister") << "input variable: " << n << std::endl; + } + } + } +} + +void TermRegistry::registerTerm(Node n, int effort) +{ + TypeNode tn = n.getType(); + bool do_register = true; + if (!tn.isStringLike()) + { + if (options::stringEagerLen()) + { + do_register = effort == 0; + } + else + { + do_register = effort > 0 || n.getKind() != STRING_CONCAT; + } + } + if (!do_register) + { + return; + } + if (d_registeredTerms.find(n) != d_registeredTerms.end()) + { + return; + } + d_registeredTerms.insert(n); + // ensure the type is registered + registerType(tn); + NodeManager* nm = NodeManager::currentNM(); + Debug("strings-register") << "TheoryStrings::registerTerm() " << n + << ", effort = " << effort << std::endl; + Node regTermLem; + if (tn.isStringLike()) + { + // register length information: + // for variables, split on empty vs positive length + // for concat/const/replace, introduce proxy var and state length relation + regTermLem = getRegisterTermLemma(n); + } + else if (n.getKind() == STRING_TO_CODE) + { + // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 ) + Node code_len = utils::mkNLength(n[0]).eqNode(d_one); + Node code_eq_neg1 = n.eqNode(d_negOne); + Node code_range = nm->mkNode( + AND, + nm->mkNode(GEQ, n, d_zero), + nm->mkNode( + LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality())))); + regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); + } + else if (n.getKind() == STRING_STRIDOF) + { + Node len = utils::mkNLength(n[0]); + regTermLem = nm->mkNode(AND, + nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))), + nm->mkNode(LEQ, n, len)); + } + if (!regTermLem.isNull()) + { + Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem + << std::endl; + Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl; + ++(d_statistics.d_lemmasRegisterTerm); + d_out.lemma(regTermLem); + } +} + +void TermRegistry::registerType(TypeNode tn) +{ + if (d_registeredTypes.find(tn) != d_registeredTypes.end()) + { + return; + } + d_registeredTypes.insert(tn); + if (tn.isStringLike()) + { + // preregister the empty word for the type + Node emp = Word::mkEmptyWord(tn); + if (!d_ee.hasTerm(emp)) + { + preRegisterTerm(emp); + } + } +} + +Node TermRegistry::getRegisterTermLemma(Node n) +{ + Assert(n.getType().isStringLike()); + NodeManager* nm = NodeManager::currentNM(); + // register length information: + // for variables, split on empty vs positive length + // for concat/const/replace, introduce proxy var and state length relation + Node lsum; + if (n.getKind() != STRING_CONCAT && !n.isConst()) + { + Node lsumb = nm->mkNode(STRING_LENGTH, n); + lsum = Rewriter::rewrite(lsumb); + // can register length term if it does not rewrite + if (lsum == lsumb) + { + registerTermAtomic(n, LENGTH_SPLIT); + return Node::null(); + } + } + Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym"); + StringsProxyVarAttribute spva; + sk.setAttribute(spva, true); + Node eq = Rewriter::rewrite(sk.eqNode(n)); + d_proxyVar[n] = sk; + // If we are introducing a proxy for a constant or concat term, we do not + // need to send lemmas about its length, since its length is already + // implied. + if (n.isConst() || n.getKind() == STRING_CONCAT) + { + // do not send length lemma for sk. + registerTermAtomic(sk, LENGTH_IGNORE); + } + Node skl = nm->mkNode(STRING_LENGTH, sk); + if (n.getKind() == STRING_CONCAT) + { + std::vector<Node> nodeVec; + for (const Node& nc : n) + { + if (nc.getAttribute(StringsProxyVarAttribute())) + { + Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end()); + nodeVec.push_back(d_proxyVarToLength[nc]); + } + else + { + Node lni = nm->mkNode(STRING_LENGTH, nc); + nodeVec.push_back(lni); + } + } + lsum = nm->mkNode(PLUS, nodeVec); + lsum = Rewriter::rewrite(lsum); + } + else if (n.isConst()) + { + lsum = nm->mkConst(Rational(Word::getLength(n))); + } + Assert(!lsum.isNull()); + d_proxyVarToLength[sk] = lsum; + Node ceq = Rewriter::rewrite(skl.eqNode(lsum)); + + return nm->mkNode(AND, eq, ceq); +} + +void TermRegistry::registerTermAtomic(Node n, LengthStatus s) +{ + if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end()) + { + return; + } + d_lengthLemmaTermsCache.insert(n); + + if (s == LENGTH_IGNORE) + { + // ignore it + return; + } + std::map<Node, bool> reqPhase; + Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase); + if (!lenLem.isNull()) + { + Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem + << std::endl; + Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl; + ++(d_statistics.d_lemmasRegisterTermAtomic); + d_out.lemma(lenLem); + } + for (const std::pair<const Node, bool>& rp : reqPhase) + { + d_out.requirePhase(rp.first, rp.second); + } +} + +SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; } + +const context::CDList<TNode>& TermRegistry::getFunctionTerms() const +{ + return d_functionsTerms; +} + +const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars() + const +{ + return d_inputVars; +} + +bool TermRegistry::hasStringCode() const { return d_hasStrCode; } + +Node TermRegistry::getRegisterTermAtomicLemma(Node n, + LengthStatus s, + std::map<Node, bool>& reqPhase) +{ + Assert(n.getType().isStringLike()); + NodeManager* nm = NodeManager::currentNM(); + Node n_len = nm->mkNode(kind::STRING_LENGTH, n); + Node emp = Word::mkEmptyWord(n.getType()); + if (s == LENGTH_GEQ_ONE) + { + Node neq_empty = n.eqNode(emp).negate(); + Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero); + Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z); + Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one + << std::endl; + Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl; + return len_geq_one; + } + + if (s == LENGTH_ONE) + { + Node len_one = n_len.eqNode(d_one); + Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one + << std::endl; + Trace("strings-assert") << "(assert " << len_one << ")" << std::endl; + return len_one; + } + Assert(s == LENGTH_SPLIT); + + std::vector<Node> lems; + // split whether the string is empty + Node n_len_eq_z = n_len.eqNode(d_zero); + Node n_len_eq_z_2 = n.eqNode(emp); + Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2); + case_empty = Rewriter::rewrite(case_empty); + Node case_nempty = nm->mkNode(GT, n_len, d_zero); + if (!case_empty.isConst()) + { + Node lem = nm->mkNode(OR, case_empty, case_nempty); + lems.push_back(lem); + // prefer trying the empty case first + // notice that requirePhase must only be called on rewritten literals that + // occur in the CNF stream. + n_len_eq_z = Rewriter::rewrite(n_len_eq_z); + Assert(!n_len_eq_z.isConst()); + reqPhase[n_len_eq_z] = true; + n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2); + Assert(!n_len_eq_z_2.isConst()); + reqPhase[n_len_eq_z_2] = true; + } + else if (!case_empty.getConst<bool>()) + { + // the rewriter knows that n is non-empty + lems.push_back(case_nempty); + } + else + { + // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that + // n ---> "". Since this method is only called on non-constants n, it must + // be that n = "" ^ len( n ) = 0 does not rewrite to true. + Assert(false); + } + + // additionally add len( x ) >= 0 ? + if (options::stringLenGeqZ()) + { + Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero); + n_len_geq = Rewriter::rewrite(n_len_geq); + lems.push_back(n_len_geq); + } + + if (lems.empty()) + { + return Node::null(); + } + return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems); +} + +Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const +{ + if (n.getNumChildren() == 0) + { + Node pn = getProxyVariableFor(n); + if (pn.isNull()) + { + return Node::null(); + } + Node eq = n.eqNode(pn); + eq = Rewriter::rewrite(eq); + if (std::find(exp.begin(), exp.end(), eq) == exp.end()) + { + exp.push_back(eq); + } + return pn; + } + std::vector<Node> children; + if (n.getMetaKind() == metakind::PARAMETERIZED) + { + children.push_back(n.getOperator()); + } + for (const Node& nc : n) + { + if (n.getType().isRegExp()) + { + children.push_back(nc); + } + else + { + Node ns = getSymbolicDefinition(nc, exp); + if (ns.isNull()) + { + return Node::null(); + } + else + { + children.push_back(ns); + } + } + } + return NodeManager::currentNM()->mkNode(n.getKind(), children); +} + +Node TermRegistry::getProxyVariableFor(Node n) const +{ + NodeNodeMap::const_iterator it = d_proxyVar.find(n); + if (it != d_proxyVar.end()) + { + return (*it).second; + } + return Node::null(); +} + +void TermRegistry::inferSubstitutionProxyVars(Node n, + std::vector<Node>& vars, + std::vector<Node>& subs, + std::vector<Node>& unproc) const +{ + if (n.getKind() == AND) + { + for (const Node& nc : n) + { + inferSubstitutionProxyVars(nc, vars, subs, unproc); + } + return; + } + if (n.getKind() == EQUAL) + { + Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end()); + ns = Rewriter::rewrite(ns); + if (ns.getKind() == EQUAL) + { + Node s; + Node v; + for (unsigned i = 0; i < 2; i++) + { + Node ss; + // determine whether this side has a proxy variable + if (ns[i].getAttribute(StringsProxyVarAttribute())) + { + // it is a proxy variable + ss = ns[i]; + } + else if (ns[i].isConst()) + { + ss = getProxyVariableFor(ns[i]); + } + if (!ss.isNull()) + { + v = ns[1 - i]; + // if the other side is a constant or variable + if (v.getNumChildren() == 0) + { + if (s.isNull()) + { + s = ss; + } + else + { + // both sides of the equality correspond to a proxy variable + if (ss == s) + { + // it is a trivial equality, e.g. between a proxy variable + // and its definition + return; + } + else + { + // equality between proxy variables, non-trivial + s = Node::null(); + } + } + } + } + } + if (!s.isNull()) + { + // the equality can be turned into a substitution + subs.push_back(s); + vars.push_back(v); + return; + } + } + else + { + n = ns; + } + } + if (!n.isConst() || !n.getConst<bool>()) + { + unproc.push_back(n); + } +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h new file mode 100644 index 000000000..b68e44b81 --- /dev/null +++ b/src/theory/strings/term_registry.h @@ -0,0 +1,261 @@ +/********************* */ +/*! \file theory_strings.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Tianyi Liang, Tim King + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Term registry for the theory of strings. + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__TERM_REGISTRY_H +#define CVC4__THEORY__STRINGS__TERM_REGISTRY_H + +#include "context/cdhashset.h" +#include "context/cdlist.h" +#include "theory/output_channel.h" +#include "theory/strings/infer_info.h" +#include "theory/strings/sequences_stats.h" +#include "theory/strings/skolem_cache.h" +#include "theory/uf/equality_engine.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * This class manages all the (pre)registration tasks for terms. These tasks + * include: + * (1) Sending out preregistration lemmas for terms, + * (2) Add terms to the equality engine, + * (3) Maintaining a list of terms d_functionsTerms (for theory combination), + * (4) Maintaining a list of input variables d_inputVars (for fmf). + * (5) Maintaining a skolem cache. Notice that this skolem cache is the + * official skolem cache that should be used by all modules in TheoryStrings. + */ +class TermRegistry +{ + typedef context::CDHashSet<Node, NodeHashFunction> NodeSet; + typedef context::CDHashSet<TypeNode, TypeNodeHashFunction> TypeNodeSet; + typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap; + + public: + TermRegistry(context::Context* c, + context::UserContext* u, + eq::EqualityEngine& ee, + OutputChannel& out, + SequencesStatistics& statistics); + ~TermRegistry(); + /** + * Preregister term, called when TheoryStrings::preRegisterTerm(n) is called. + * This does the following: + * - Checks for illegal terms and throws a LogicException if any term is + * not handled. + * - Adds the appropriate terms and triggers to the equality engine. + * - Updates information about which terms exist, including + * d_functionsTerms and d_hasStrCode. If we are using strings finite model + * finding (options::stringsFmf), we determine if the term n should be + * added to d_inputVars, the set of terms of type string whose length we are + * minimizing with its decision strategy. + * - Setting phase requirements on n if it is a formula and we prefer + * decisions with a particular polarity (e.g. positive regular expression + * memberships). + */ + void preRegisterTerm(TNode n); + /** Register term + * + * This performs SAT-context-independent registration for a term n, which + * may cause lemmas to be sent on the output channel that involve + * "initial refinement lemmas" for n. This includes introducing proxy + * variables for string terms and asserting that str.code terms are within + * proper bounds. + * + * Effort is one of the following (TODO make enum #1881): + * 0 : upon preregistration or internal assertion + * 1 : upon occurrence in length term + * 2 : before normal form computation + * 3 : called on normal form terms + * + * Based on the strategy, we may choose to add these initial refinement + * lemmas at one of the following efforts, where if it is not the given + * effort, the call to this method does nothing. + */ + void registerTerm(Node n, int effort); + /** register length + * + * This method is called on non-constant string terms n that are "atomic" + * with respect to length. That is, the rewritten form of len(n) is itself. + * + * It sends a lemma on the output channel that ensures that the length n + * satisfies its assigned status (given by argument s). + * + * If the status is LENGTH_ONE, we send the lemma len( n ) = 1. + * + * If the status is LENGTH_GEQ_ONE, we send a lemma n != "" ^ len( n ) > 0. + * + * If the status is LENGTH_SPLIT, we send a send a lemma of the form: + * ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0 + * This method also ensures that, when applicable, the left branch is taken + * first via calls to requirePhase. + * + * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used + * e.g. when the length of n is already implied by other constraints. + * + * In contrast to the above functions, it makes immediate calls to the output + * channel instead of adding them to pending lists. + */ + void registerTermAtomic(Node n, LengthStatus s); + //---------------------------- queries + /** Get the skolem cache of this object */ + SkolemCache* getSkolemCache(); + /** Get all function terms that have been preregistered to this object */ + const context::CDList<TNode>& getFunctionTerms() const; + /** + * Get the "input variables", corresponding to the set of leaf nodes of + * string-like type that have been preregistered as terms to this object. + */ + const context::CDHashSet<Node, NodeHashFunction>& getInputVars() const; + /** Returns true if any str.code terms have been preregistered */ + bool hasStringCode() const; + //---------------------------- end queries + //---------------------------- proxy variables + /** Get symbolic definition + * + * This method returns the "symbolic definition" of n, call it n', and + * populates the vector exp with an explanation such that exp => n = n'. + * + * The symbolic definition of n is the term where (maximal) subterms of n + * are replaced by their proxy variables. For example, if we introduced + * proxy variable v for x ++ y, then given input x ++ y = w, this method + * returns v = w and adds v = x ++ y to exp. + */ + Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const; + /** Get proxy variable + * + * If this method returns the proxy variable for (string) term n if one + * exists, otherwise it returns null. + */ + Node getProxyVariableFor(Node n) const; + + /** infer substitution proxy vars + * + * This method attempts to (partially) convert the formula n into a + * substitution of the form: + * v1 -> s1, ..., vn -> sn + * where s1 ... sn are proxy variables and v1 ... vn are either variables + * or constants. + * + * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent + * to P ^ n, where P is the conjunction of equalities corresponding to the + * definition of all proxy variables introduced by the theory of strings. + * + * For example, say that v1 was introduced as a proxy variable for "ABC", and + * v2 was introduced as a proxy variable for "AA". + * + * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets: + * vars = { x }, + * subs = { v2 }, + * unproc = {}. + * In particular, this says that the information content of n is essentially + * x = v2. The first and third conjunctions can be dropped from the + * explanation since these equalities simply correspond to definitions + * of proxy variables. + * + * This method is used as a performance heuristic. It can infer when the + * explanation of a fact depends only trivially on equalities corresponding + * to definitions of proxy variables, which can be omitted since they are + * assumed to hold globally. + */ + void inferSubstitutionProxyVars(Node n, + std::vector<Node>& vars, + std::vector<Node>& subs, + std::vector<Node>& unproc) const; + //---------------------------- end proxy variables + private: + /** Common constants */ + Node d_zero; + Node d_one; + Node d_negOne; + /** the cardinality of the alphabet */ + uint32_t d_cardSize; + /** Reference to equality engine of the theory of strings. */ + eq::EqualityEngine& d_ee; + /** Reference to the output channel of the theory of strings. */ + OutputChannel& d_out; + /** Reference to the statistics for the theory of strings/sequences. */ + SequencesStatistics& d_statistics; + /** have we asserted any str.code terms? */ + bool d_hasStrCode; + /** The cache of all skolems, which is owned by this class. */ + SkolemCache d_skCache; + /** All function terms that the theory has seen in the current SAT context */ + context::CDList<TNode> d_functionsTerms; + /** + * The set of terms of type string that are abstracted as leaf nodes. + */ + NodeSet d_inputVars; + /** The user-context dependent cache of terms that have been preregistered */ + NodeSet d_preregisteredTerms; + /** The user-context dependent cache of terms that have been registered */ + NodeSet d_registeredTerms; + /** The types that we have preregistered */ + TypeNodeSet d_registeredTypes; + /** + * Map string terms to their "proxy variables". Proxy variables are used are + * intermediate variables so that length information can be communicated for + * constants. For example, to communicate that "ABC" has length 3, we + * introduce a proxy variable v_{"ABC"} for "ABC", and assert: + * v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3 + * Notice this is required since we cannot directly write len( "ABC" ) = 3, + * which rewrites to 3 = 3. + * In the above example, we store "ABC" -> v_{"ABC"} in this map. + */ + NodeNodeMap d_proxyVar; + /** + * Map from proxy variables to their normalized length. In the above example, + * we store "ABC" -> 3. + */ + NodeNodeMap d_proxyVarToLength; + /** List of terms that we have register length for */ + NodeSet d_lengthLemmaTermsCache; + /** Register type + * + * Ensures the theory solver is setup to handle string-like type tn. In + * particular, this includes: + * - Calling preRegisterTerm on the empty word for tn + */ + void registerType(TypeNode tn); + /** register term + * + * This method is called on non-constant string terms n. It returns a lemma + * that should be sent on the output channel of theory of strings upon + * registration of this term, or null if no lemma is necessary. + * + * If n is an atomic term, the method registerTermAtomic is called for n + * and s = LENGTH_SPLIT and no lemma is returned. + */ + Node getRegisterTermLemma(Node n); + /** + * Get the lemma required for registering the length information for + * atomic term n given length status s. For details, see registerTermAtomic. + * + * Additionally, this method may map literals to a required polarity in the + * argument reqPhase, which should be processed by a call to requiredPhase by + * the caller of this method. + */ + Node getRegisterTermAtomicLemma(Node n, + LengthStatus s, + std::map<Node, bool>& reqPhase); +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__TERM_REGISTRY_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 81e43c595..3e490f285 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -72,18 +72,14 @@ TheoryStrings::TheoryStrings(context::Context* c, d_statistics(), d_equalityEngine(d_notify, c, "theory::strings::ee", true), d_state(c, d_equalityEngine, d_valuation), - d_im(*this, c, u, d_state, d_sk_cache, out, d_statistics), - d_pregistered_terms_cache(u), - d_registered_terms_cache(u), - d_registeredTypesCache(u), - d_functionsTerms(c), - d_has_str_code(false), + d_termReg(c, u, d_equalityEngine, out, d_statistics), + d_im(*this, c, u, d_state, d_termReg, out, d_statistics), d_rewriter(&d_statistics.d_rewrites), d_bsolver(c, u, d_state, d_im), - d_csolver(c, u, d_state, d_im, d_sk_cache, d_bsolver), + d_csolver(c, u, d_state, d_im, d_termReg, d_bsolver), d_esolver(nullptr), d_rsolver(nullptr), - d_stringsFmf(c, u, valuation, d_sk_cache), + d_stringsFmf(c, u, valuation, d_termReg), d_strategy_init(false) { setupExtTheory(); @@ -92,7 +88,7 @@ TheoryStrings::TheoryStrings(context::Context* c, u, d_state, d_im, - d_sk_cache, + d_termReg, d_rewriter, d_bsolver, d_csolver, @@ -374,7 +370,7 @@ bool TheoryStrings::collectModelInfoType( { // does it have a code and the length of these equivalence classes are // one? - if (d_has_str_code && lts_values[i] == d_one) + if (d_termReg.hasStringCode() && lts_values[i] == d_one) { EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false); if (eip && !eip->d_codeTerm.get().isNull()) @@ -550,94 +546,11 @@ bool TheoryStrings::collectModelInfoType( // MAIN SOLVER ///////////////////////////////////////////////////////////////////////////// - -void TheoryStrings::preRegisterTerm(TNode n) { - if( d_pregistered_terms_cache.find(n) == d_pregistered_terms_cache.end() ) { - d_pregistered_terms_cache.insert(n); - Trace("strings-preregister") - << "TheoryString::preregister : " << n << std::endl; - //check for logic exceptions - Kind k = n.getKind(); - if( !options::stringExp() ){ - if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS - || k == kind::STRING_STOI || k == kind::STRING_STRREPL - || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN - || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER - || k == STRING_REV) - { - std::stringstream ss; - ss << "Term of kind " << k - << " not supported in default mode, try --strings-exp"; - throw LogicException(ss.str()); - } - } - switch (k) - { - case kind::EQUAL: { - d_equalityEngine.addTriggerEquality(n); - break; - } - case kind::STRING_IN_REGEXP: { - d_out->requirePhase(n, true); - d_equalityEngine.addTriggerPredicate(n); - d_equalityEngine.addTerm(n[0]); - d_equalityEngine.addTerm(n[1]); - break; - } - default: { - registerTerm(n, 0); - TypeNode tn = n.getType(); - if (tn.isRegExp() && n.isVar()) - { - std::stringstream ss; - ss << "Regular expression variables are not supported."; - throw LogicException(ss.str()); - } - if( tn.isString() ) { - // all characters of constants should fall in the alphabet - if (n.isConst()) - { - std::vector<unsigned> vec = n.getConst<String>().getVec(); - for (unsigned u : vec) - { - if (u >= d_cardSize) - { - std::stringstream ss; - ss << "Characters in string \"" << n - << "\" are outside of the given alphabet."; - throw LogicException(ss.str()); - } - } - } - d_equalityEngine.addTerm(n); - } else if (tn.isBoolean()) { - // Get triggered for both equal and dis-equal - d_equalityEngine.addTriggerPredicate(n); - } else { - // Function applications/predicates - d_equalityEngine.addTerm(n); - } - // Set d_functionsTerms stores all function applications that are - // relevant to theory combination. Notice that this is a subset of - // the applications whose kinds are function kinds in the equality - // engine. This means it does not include applications of operators - // like re.++, which is not a function kind in the equality engine. - // Concatenation terms do not need to be considered here because - // their arguments have string type and do not introduce any shared - // terms. - if (n.hasOperator() && d_equalityEngine.isFunctionKind(k) - && k != kind::STRING_CONCAT) - { - d_functionsTerms.push_back( n ); - } - } - } - // register with finite model finding - if (options::stringFMF()) - { - d_stringsFmf.preRegisterTerm(n); - } - } +void TheoryStrings::preRegisterTerm(TNode n) +{ + Trace("strings-preregister") + << "TheoryStrings::preRegisterTerm: " << n << std::endl; + d_termReg.preRegisterTerm(n); } Node TheoryStrings::expandDefinition(Node node) @@ -799,7 +712,7 @@ void TheoryStrings::eqNotifyNewClass(TNode t){ { Trace("strings-debug") << "New length eqc : " << t << std::endl; //we care about the length of this string - registerTerm( t[0], 1 ); + d_termReg.registerTerm(t[0], 1); } d_state.eqNotifyNewClass(t); } @@ -887,9 +800,10 @@ void TheoryStrings::computeCareGraph(){ // since operators are polymorphic, taking strings/sequences. std::map<std::pair<TypeNode, Node>, TNodeTrie> index; std::map< Node, unsigned > arity; - unsigned functionTerms = d_functionsTerms.size(); + const context::CDList<TNode>& fterms = d_termReg.getFunctionTerms(); + size_t functionTerms = fterms.size(); for (unsigned i = 0; i < functionTerms; ++ i) { - TNode f1 = d_functionsTerms[i]; + TNode f1 = fterms[i]; Trace("strings-cg") << "...build for " << f1 << std::endl; Node op = f1.getOperator(); std::vector< TNode > reps; @@ -926,7 +840,7 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) { if (!d_equalityEngine.hasTerm(atom[j]) && atom[j].getType().isStringLike()) { - registerTerm( atom[j], 0 ); + d_termReg.registerTerm(atom[j], 0); } } Trace("strings-pending-debug") << " Now assert equality" << std::endl; @@ -980,7 +894,7 @@ void TheoryStrings::checkRegisterTermsPreNormalForm() Node n = (*eqc_i); if (!d_bsolver.isCongruent(n)) { - registerTerm(n, 2); + d_termReg.registerTerm(n, 2); } ++eqc_i; } @@ -991,7 +905,7 @@ void TheoryStrings::checkCodes() { // ensure that lemmas regarding str.code been added for each constant string // of length one - if (d_has_str_code) + if (d_termReg.hasStringCode()) { NodeManager* nm = NodeManager::currentNM(); // str.code applied to the code term for each equivalence class that has a @@ -1012,7 +926,7 @@ void TheoryStrings::checkCodes() Node cc = nm->mkNode(kind::STRING_TO_CODE, c); cc = Rewriter::rewrite(cc); Assert(cc.isConst()); - Node cp = d_im.getProxyVariableFor(c); + Node cp = d_termReg.getProxyVariableFor(c); AlwaysAssert(!cp.isNull()); Node vc = nm->mkNode(STRING_TO_CODE, cp); if (!d_state.areEqual(cc, vc)) @@ -1062,91 +976,6 @@ void TheoryStrings::checkCodes() } } -void TheoryStrings::registerTerm(Node n, int effort) -{ - TypeNode tn = n.getType(); - bool do_register = true; - if (!tn.isStringLike()) - { - if (options::stringEagerLen()) - { - do_register = effort == 0; - } - else - { - do_register = effort > 0 || n.getKind() != STRING_CONCAT; - } - } - if (!do_register) - { - return; - } - if (d_registered_terms_cache.find(n) != d_registered_terms_cache.end()) - { - return; - } - d_registered_terms_cache.insert(n); - // ensure the type is registered - registerType(tn); - NodeManager* nm = NodeManager::currentNM(); - Debug("strings-register") << "TheoryStrings::registerTerm() " << n - << ", effort = " << effort << std::endl; - Node regTermLem; - if (tn.isStringLike()) - { - // register length information: - // for variables, split on empty vs positive length - // for concat/const/replace, introduce proxy var and state length relation - regTermLem = d_im.registerTerm(n); - } - else if (n.getKind() == STRING_TO_CODE) - { - d_has_str_code = true; - // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 ) - Node code_len = utils::mkNLength(n[0]).eqNode(d_one); - Node code_eq_neg1 = n.eqNode(d_neg_one); - Node code_range = nm->mkNode( - AND, - nm->mkNode(GEQ, n, d_zero), - nm->mkNode( - LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality())))); - regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); - } - else if (n.getKind() == STRING_STRIDOF) - { - Node len = utils::mkNLength(n[0]); - regTermLem = nm->mkNode(AND, - nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))), - nm->mkNode(LEQ, n, len)); - } - if (!regTermLem.isNull()) - { - Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem - << std::endl; - Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl; - ++(d_statistics.d_lemmasRegisterTerm); - d_out->lemma(regTermLem); - } -} - -void TheoryStrings::registerType(TypeNode tn) -{ - if (d_registeredTypesCache.find(tn) != d_registeredTypesCache.end()) - { - return; - } - d_registeredTypesCache.insert(tn); - if (tn.isStringLike()) - { - // preregister the empty word for the type - Node emp = Word::mkEmptyWord(tn); - if (!d_equalityEngine.hasTerm(emp)) - { - preRegisterTerm(emp); - } - } -} - void TheoryStrings::checkRegisterTermsNormalForms() { const std::vector<Node>& seqc = d_bsolver.getStringEqc(); @@ -1159,7 +988,7 @@ void TheoryStrings::checkRegisterTermsNormalForms() if (lt.isNull()) { Node c = utils::mkNConcat(nfi.d_nf, eqc.getType()); - registerTerm(c, 3); + d_termReg.registerTerm(c, 3); } } } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 453e4eca9..4a61730f4 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -24,7 +24,6 @@ #include "context/cdhashset.h" #include "context/cdlist.h" -#include "expr/attribute.h" #include "expr/node_trie.h" #include "theory/strings/base_solver.h" #include "theory/strings/core_solver.h" @@ -40,6 +39,7 @@ #include "theory/strings/solver_state.h" #include "theory/strings/strings_fmf.h" #include "theory/strings/strings_rewriter.h" +#include "theory/strings/term_registry.h" #include "theory/theory.h" #include "theory/uf/equality_engine.h" @@ -93,9 +93,6 @@ enum InferStep }; std::ostream& operator<<(std::ostream& out, Inference i); -struct StringsProxyVarAttributeId {}; -typedef expr::Attribute< StringsProxyVarAttributeId, bool > StringsProxyVarAttribute; - class TheoryStrings : public Theory { friend class InferenceManager; typedef context::CDList<Node> NodeList; @@ -238,13 +235,10 @@ class TheoryStrings : public Theory { eq::EqualityEngine d_equalityEngine; /** The solver state object */ SolverState d_state; + /** The term registry for this theory */ + TermRegistry d_termReg; /** The (custom) output channel of the theory of strings */ InferenceManager d_im; - // preReg cache - NodeSet d_pregistered_terms_cache; - NodeSet d_registered_terms_cache; - /** The types that we have preregistered */ - TypeNodeSet d_registeredTypesCache; std::vector< Node > d_empty_vec; private: @@ -266,24 +260,6 @@ private: EqualityStatus getEqualityStatus(TNode a, TNode b) override; private: - /** All the function terms that the theory has seen */ - context::CDList<TNode> d_functionsTerms; -private: - /** have we asserted any str.code terms? */ - bool d_has_str_code; - // static information about extf - class ExtfInfo { - public: - //all variables in this term - std::vector< Node > d_vars; - }; - - private: - - /** cache of all skolems */ - SkolemCache d_sk_cache; - - private: void addCarePairs(TNodeTrie* t1, TNodeTrie* t2, unsigned arity, @@ -334,38 +310,10 @@ private: */ void assertPendingFact(Node atom, bool polarity, Node exp); - /** Register term - * - * This performs SAT-context-independent registration for a term n, which - * may cause lemmas to be sent on the output channel that involve - * "initial refinement lemmas" for n. This includes introducing proxy - * variables for string terms and asserting that str.code terms are within - * proper bounds. - * - * Effort is one of the following (TODO make enum #1881): - * 0 : upon preregistration or internal assertion - * 1 : upon occurrence in length term - * 2 : before normal form computation - * 3 : called on normal form terms - * - * Based on the strategy, we may choose to add these initial refinement - * lemmas at one of the following efforts, where if it is not the given - * effort, the call to this method does nothing. - */ - void registerTerm(Node n, int effort); - /** Register type - * - * Ensures the theory solver is setup to handle string-like type tn. In - * particular, this includes: - * - Calling preRegisterTerm on the empty word for tn - */ - void registerType(TypeNode tn); - // Symbolic Regular Expression private: /** The theory rewriter for this theory. */ StringsRewriter d_rewriter; - /** * The base solver, responsible for reasoning about congruent terms and * inferring constants for equivalence classes. |