From 42c765eb255e5bfa65682cd812973f0f3c90017c Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 15 Apr 2020 08:23:27 -0500 Subject: Split TermRegistry object from TheoryStrings (#4312) This consolidates functionalities from TheoryStrings and InferenceManager related to registering terms, including sending "preregistration lemmas" for them. The main purpose of this PR is to detangle this module from InferenceManager so that these two modules have exactly one call to OutputChannel::lemma each. For the purposes of the theory solvers, TermRegistry contains the official SkolemCache of TheoryStrings, and can be seen as subsuming the previous interface for this class. This PR is needed for further progress on strings proofs, marking as major since this will be a blocker shortly for this project. A few things were cleaned in this PR. One function changed name InferenceManager::registerTerm --> TermRegistry::getRegisterTermLemma. No major behavior changes. --- src/theory/strings/term_registry.cpp | 578 +++++++++++++++++++++++++++++++++++ 1 file changed, 578 insertions(+) create mode 100644 src/theory/strings/term_registry.cpp (limited to 'src/theory/strings/term_registry.cpp') diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp new file mode 100644 index 000000000..4f1e41ebb --- /dev/null +++ b/src/theory/strings/term_registry.cpp @@ -0,0 +1,578 @@ +/********************* */ +/*! \file term_registry.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of term registry for the theory of strings. + **/ + +#include "theory/strings/term_registry.h" + +#include "expr/attribute.h" +#include "options/strings_options.h" +#include "smt/logic_exception.h" +#include "theory/rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" + +using namespace std; +using namespace CVC4::context; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +struct StringsProxyVarAttributeId +{ +}; +typedef expr::Attribute + StringsProxyVarAttribute; + +TermRegistry::TermRegistry(context::Context* c, + context::UserContext* u, + eq::EqualityEngine& ee, + OutputChannel& out, + SequencesStatistics& statistics) + : d_ee(ee), + d_out(out), + d_statistics(statistics), + d_hasStrCode(false), + d_functionsTerms(c), + d_inputVars(u), + d_preregisteredTerms(u), + d_registeredTerms(u), + d_registeredTypes(u), + d_proxyVar(u), + d_proxyVarToLength(u), + d_lengthLemmaTermsCache(u) +{ + NodeManager* nm = NodeManager::currentNM(); + d_zero = nm->mkConst(Rational(0)); + d_one = nm->mkConst(Rational(1)); + d_negOne = NodeManager::currentNM()->mkConst(Rational(-1)); + d_cardSize = utils::getAlphabetCardinality(); +} + +TermRegistry::~TermRegistry() {} + +void TermRegistry::preRegisterTerm(TNode n) +{ + if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end()) + { + return; + } + d_preregisteredTerms.insert(n); + Trace("strings-preregister") + << "TheoryString::preregister : " << n << std::endl; + // check for logic exceptions + Kind k = n.getKind(); + if (!options::stringExp()) + { + if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI + || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN + || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER + || k == STRING_REV) + { + std::stringstream ss; + ss << "Term of kind " << k + << " not supported in default mode, try --strings-exp"; + throw LogicException(ss.str()); + } + } + if (k == EQUAL) + { + d_ee.addTriggerEquality(n); + return; + } + else if (k == STRING_IN_REGEXP) + { + d_out.requirePhase(n, true); + d_ee.addTriggerPredicate(n); + d_ee.addTerm(n[0]); + d_ee.addTerm(n[1]); + return; + } + else if (k == STRING_TO_CODE) + { + d_hasStrCode = true; + } + registerTerm(n, 0); + TypeNode tn = n.getType(); + if (tn.isRegExp() && n.isVar()) + { + std::stringstream ss; + ss << "Regular expression variables are not supported."; + throw LogicException(ss.str()); + } + if (tn.isString()) + { + // all characters of constants should fall in the alphabet + if (n.isConst()) + { + std::vector vec = n.getConst().getVec(); + for (unsigned u : vec) + { + if (u >= d_cardSize) + { + std::stringstream ss; + ss << "Characters in string \"" << n + << "\" are outside of the given alphabet."; + throw LogicException(ss.str()); + } + } + } + d_ee.addTerm(n); + } + else if (tn.isBoolean()) + { + // Get triggered for both equal and dis-equal + d_ee.addTriggerPredicate(n); + } + else + { + // Function applications/predicates + d_ee.addTerm(n); + } + // Set d_functionsTerms stores all function applications that are + // relevant to theory combination. Notice that this is a subset of + // the applications whose kinds are function kinds in the equality + // engine. This means it does not include applications of operators + // like re.++, which is not a function kind in the equality engine. + // Concatenation terms do not need to be considered here because + // their arguments have string type and do not introduce any shared + // terms. + if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT) + { + d_functionsTerms.push_back(n); + } + if (options::stringFMF()) + { + if (tn.isStringLike()) + { + // Our decision strategy will minimize the length of this term if it is a + // variable but not an internally generated Skolem, or a term that does + // not belong to this theory. + if (n.isVar() ? !d_skCache.isSkolem(n) + : kindToTheoryId(k) != THEORY_STRINGS) + { + d_inputVars.insert(n); + Trace("strings-preregister") << "input variable: " << n << std::endl; + } + } + } +} + +void TermRegistry::registerTerm(Node n, int effort) +{ + TypeNode tn = n.getType(); + bool do_register = true; + if (!tn.isStringLike()) + { + if (options::stringEagerLen()) + { + do_register = effort == 0; + } + else + { + do_register = effort > 0 || n.getKind() != STRING_CONCAT; + } + } + if (!do_register) + { + return; + } + if (d_registeredTerms.find(n) != d_registeredTerms.end()) + { + return; + } + d_registeredTerms.insert(n); + // ensure the type is registered + registerType(tn); + NodeManager* nm = NodeManager::currentNM(); + Debug("strings-register") << "TheoryStrings::registerTerm() " << n + << ", effort = " << effort << std::endl; + Node regTermLem; + if (tn.isStringLike()) + { + // register length information: + // for variables, split on empty vs positive length + // for concat/const/replace, introduce proxy var and state length relation + regTermLem = getRegisterTermLemma(n); + } + else if (n.getKind() == STRING_TO_CODE) + { + // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 ) + Node code_len = utils::mkNLength(n[0]).eqNode(d_one); + Node code_eq_neg1 = n.eqNode(d_negOne); + Node code_range = nm->mkNode( + AND, + nm->mkNode(GEQ, n, d_zero), + nm->mkNode( + LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality())))); + regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); + } + else if (n.getKind() == STRING_STRIDOF) + { + Node len = utils::mkNLength(n[0]); + regTermLem = nm->mkNode(AND, + nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))), + nm->mkNode(LEQ, n, len)); + } + if (!regTermLem.isNull()) + { + Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem + << std::endl; + Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl; + ++(d_statistics.d_lemmasRegisterTerm); + d_out.lemma(regTermLem); + } +} + +void TermRegistry::registerType(TypeNode tn) +{ + if (d_registeredTypes.find(tn) != d_registeredTypes.end()) + { + return; + } + d_registeredTypes.insert(tn); + if (tn.isStringLike()) + { + // preregister the empty word for the type + Node emp = Word::mkEmptyWord(tn); + if (!d_ee.hasTerm(emp)) + { + preRegisterTerm(emp); + } + } +} + +Node TermRegistry::getRegisterTermLemma(Node n) +{ + Assert(n.getType().isStringLike()); + NodeManager* nm = NodeManager::currentNM(); + // register length information: + // for variables, split on empty vs positive length + // for concat/const/replace, introduce proxy var and state length relation + Node lsum; + if (n.getKind() != STRING_CONCAT && !n.isConst()) + { + Node lsumb = nm->mkNode(STRING_LENGTH, n); + lsum = Rewriter::rewrite(lsumb); + // can register length term if it does not rewrite + if (lsum == lsumb) + { + registerTermAtomic(n, LENGTH_SPLIT); + return Node::null(); + } + } + Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym"); + StringsProxyVarAttribute spva; + sk.setAttribute(spva, true); + Node eq = Rewriter::rewrite(sk.eqNode(n)); + d_proxyVar[n] = sk; + // If we are introducing a proxy for a constant or concat term, we do not + // need to send lemmas about its length, since its length is already + // implied. + if (n.isConst() || n.getKind() == STRING_CONCAT) + { + // do not send length lemma for sk. + registerTermAtomic(sk, LENGTH_IGNORE); + } + Node skl = nm->mkNode(STRING_LENGTH, sk); + if (n.getKind() == STRING_CONCAT) + { + std::vector nodeVec; + for (const Node& nc : n) + { + if (nc.getAttribute(StringsProxyVarAttribute())) + { + Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end()); + nodeVec.push_back(d_proxyVarToLength[nc]); + } + else + { + Node lni = nm->mkNode(STRING_LENGTH, nc); + nodeVec.push_back(lni); + } + } + lsum = nm->mkNode(PLUS, nodeVec); + lsum = Rewriter::rewrite(lsum); + } + else if (n.isConst()) + { + lsum = nm->mkConst(Rational(Word::getLength(n))); + } + Assert(!lsum.isNull()); + d_proxyVarToLength[sk] = lsum; + Node ceq = Rewriter::rewrite(skl.eqNode(lsum)); + + return nm->mkNode(AND, eq, ceq); +} + +void TermRegistry::registerTermAtomic(Node n, LengthStatus s) +{ + if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end()) + { + return; + } + d_lengthLemmaTermsCache.insert(n); + + if (s == LENGTH_IGNORE) + { + // ignore it + return; + } + std::map reqPhase; + Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase); + if (!lenLem.isNull()) + { + Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem + << std::endl; + Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl; + ++(d_statistics.d_lemmasRegisterTermAtomic); + d_out.lemma(lenLem); + } + for (const std::pair& rp : reqPhase) + { + d_out.requirePhase(rp.first, rp.second); + } +} + +SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; } + +const context::CDList& TermRegistry::getFunctionTerms() const +{ + return d_functionsTerms; +} + +const context::CDHashSet& TermRegistry::getInputVars() + const +{ + return d_inputVars; +} + +bool TermRegistry::hasStringCode() const { return d_hasStrCode; } + +Node TermRegistry::getRegisterTermAtomicLemma(Node n, + LengthStatus s, + std::map& reqPhase) +{ + Assert(n.getType().isStringLike()); + NodeManager* nm = NodeManager::currentNM(); + Node n_len = nm->mkNode(kind::STRING_LENGTH, n); + Node emp = Word::mkEmptyWord(n.getType()); + if (s == LENGTH_GEQ_ONE) + { + Node neq_empty = n.eqNode(emp).negate(); + Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero); + Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z); + Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one + << std::endl; + Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl; + return len_geq_one; + } + + if (s == LENGTH_ONE) + { + Node len_one = n_len.eqNode(d_one); + Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one + << std::endl; + Trace("strings-assert") << "(assert " << len_one << ")" << std::endl; + return len_one; + } + Assert(s == LENGTH_SPLIT); + + std::vector lems; + // split whether the string is empty + Node n_len_eq_z = n_len.eqNode(d_zero); + Node n_len_eq_z_2 = n.eqNode(emp); + Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2); + case_empty = Rewriter::rewrite(case_empty); + Node case_nempty = nm->mkNode(GT, n_len, d_zero); + if (!case_empty.isConst()) + { + Node lem = nm->mkNode(OR, case_empty, case_nempty); + lems.push_back(lem); + // prefer trying the empty case first + // notice that requirePhase must only be called on rewritten literals that + // occur in the CNF stream. + n_len_eq_z = Rewriter::rewrite(n_len_eq_z); + Assert(!n_len_eq_z.isConst()); + reqPhase[n_len_eq_z] = true; + n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2); + Assert(!n_len_eq_z_2.isConst()); + reqPhase[n_len_eq_z_2] = true; + } + else if (!case_empty.getConst()) + { + // the rewriter knows that n is non-empty + lems.push_back(case_nempty); + } + else + { + // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that + // n ---> "". Since this method is only called on non-constants n, it must + // be that n = "" ^ len( n ) = 0 does not rewrite to true. + Assert(false); + } + + // additionally add len( x ) >= 0 ? + if (options::stringLenGeqZ()) + { + Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero); + n_len_geq = Rewriter::rewrite(n_len_geq); + lems.push_back(n_len_geq); + } + + if (lems.empty()) + { + return Node::null(); + } + return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems); +} + +Node TermRegistry::getSymbolicDefinition(Node n, std::vector& exp) const +{ + if (n.getNumChildren() == 0) + { + Node pn = getProxyVariableFor(n); + if (pn.isNull()) + { + return Node::null(); + } + Node eq = n.eqNode(pn); + eq = Rewriter::rewrite(eq); + if (std::find(exp.begin(), exp.end(), eq) == exp.end()) + { + exp.push_back(eq); + } + return pn; + } + std::vector children; + if (n.getMetaKind() == metakind::PARAMETERIZED) + { + children.push_back(n.getOperator()); + } + for (const Node& nc : n) + { + if (n.getType().isRegExp()) + { + children.push_back(nc); + } + else + { + Node ns = getSymbolicDefinition(nc, exp); + if (ns.isNull()) + { + return Node::null(); + } + else + { + children.push_back(ns); + } + } + } + return NodeManager::currentNM()->mkNode(n.getKind(), children); +} + +Node TermRegistry::getProxyVariableFor(Node n) const +{ + NodeNodeMap::const_iterator it = d_proxyVar.find(n); + if (it != d_proxyVar.end()) + { + return (*it).second; + } + return Node::null(); +} + +void TermRegistry::inferSubstitutionProxyVars(Node n, + std::vector& vars, + std::vector& subs, + std::vector& unproc) const +{ + if (n.getKind() == AND) + { + for (const Node& nc : n) + { + inferSubstitutionProxyVars(nc, vars, subs, unproc); + } + return; + } + if (n.getKind() == EQUAL) + { + Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end()); + ns = Rewriter::rewrite(ns); + if (ns.getKind() == EQUAL) + { + Node s; + Node v; + for (unsigned i = 0; i < 2; i++) + { + Node ss; + // determine whether this side has a proxy variable + if (ns[i].getAttribute(StringsProxyVarAttribute())) + { + // it is a proxy variable + ss = ns[i]; + } + else if (ns[i].isConst()) + { + ss = getProxyVariableFor(ns[i]); + } + if (!ss.isNull()) + { + v = ns[1 - i]; + // if the other side is a constant or variable + if (v.getNumChildren() == 0) + { + if (s.isNull()) + { + s = ss; + } + else + { + // both sides of the equality correspond to a proxy variable + if (ss == s) + { + // it is a trivial equality, e.g. between a proxy variable + // and its definition + return; + } + else + { + // equality between proxy variables, non-trivial + s = Node::null(); + } + } + } + } + } + if (!s.isNull()) + { + // the equality can be turned into a substitution + subs.push_back(s); + vars.push_back(v); + return; + } + } + else + { + n = ns; + } + } + if (!n.isConst() || !n.getConst()) + { + unproc.push_back(n); + } +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 -- cgit v1.2.3