summaryrefslogtreecommitdiff
path: root/src/theory/strings/term_registry.cpp
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-04-15 08:23:27 -0500
committerGitHub <noreply@github.com>2020-04-15 08:23:27 -0500
commit42c765eb255e5bfa65682cd812973f0f3c90017c (patch)
tree7d45554cfc628cfd457e731d2f0d1bc6f6f029c8 /src/theory/strings/term_registry.cpp
parent0f3e4d1c802e1fe5fd90e712a6382ff9ca2bab34 (diff)
Split TermRegistry object from TheoryStrings (#4312)
This consolidates functionalities from TheoryStrings and InferenceManager related to registering terms, including sending "preregistration lemmas" for them. The main purpose of this PR is to detangle this module from InferenceManager so that these two modules have exactly one call to OutputChannel::lemma each. For the purposes of the theory solvers, TermRegistry contains the official SkolemCache of TheoryStrings, and can be seen as subsuming the previous interface for this class. This PR is needed for further progress on strings proofs, marking as major since this will be a blocker shortly for this project. A few things were cleaned in this PR. One function changed name InferenceManager::registerTerm --> TermRegistry::getRegisterTermLemma. No major behavior changes.
Diffstat (limited to 'src/theory/strings/term_registry.cpp')
-rw-r--r--src/theory/strings/term_registry.cpp578
1 files changed, 578 insertions, 0 deletions
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
new file mode 100644
index 000000000..4f1e41ebb
--- /dev/null
+++ b/src/theory/strings/term_registry.cpp
@@ -0,0 +1,578 @@
+/********************* */
+/*! \file term_registry.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds, Tianyi Liang, Morgan Deters
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of term registry for the theory of strings.
+ **/
+
+#include "theory/strings/term_registry.h"
+
+#include "expr/attribute.h"
+#include "options/strings_options.h"
+#include "smt/logic_exception.h"
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace std;
+using namespace CVC4::context;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+struct StringsProxyVarAttributeId
+{
+};
+typedef expr::Attribute<StringsProxyVarAttributeId, bool>
+ StringsProxyVarAttribute;
+
+TermRegistry::TermRegistry(context::Context* c,
+ context::UserContext* u,
+ eq::EqualityEngine& ee,
+ OutputChannel& out,
+ SequencesStatistics& statistics)
+ : d_ee(ee),
+ d_out(out),
+ d_statistics(statistics),
+ d_hasStrCode(false),
+ d_functionsTerms(c),
+ d_inputVars(u),
+ d_preregisteredTerms(u),
+ d_registeredTerms(u),
+ d_registeredTypes(u),
+ d_proxyVar(u),
+ d_proxyVarToLength(u),
+ d_lengthLemmaTermsCache(u)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ d_zero = nm->mkConst(Rational(0));
+ d_one = nm->mkConst(Rational(1));
+ d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
+ d_cardSize = utils::getAlphabetCardinality();
+}
+
+TermRegistry::~TermRegistry() {}
+
+void TermRegistry::preRegisterTerm(TNode n)
+{
+ if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
+ {
+ return;
+ }
+ d_preregisteredTerms.insert(n);
+ Trace("strings-preregister")
+ << "TheoryString::preregister : " << n << std::endl;
+ // check for logic exceptions
+ Kind k = n.getKind();
+ if (!options::stringExp())
+ {
+ if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
+ || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN
+ || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
+ || k == STRING_REV)
+ {
+ std::stringstream ss;
+ ss << "Term of kind " << k
+ << " not supported in default mode, try --strings-exp";
+ throw LogicException(ss.str());
+ }
+ }
+ if (k == EQUAL)
+ {
+ d_ee.addTriggerEquality(n);
+ return;
+ }
+ else if (k == STRING_IN_REGEXP)
+ {
+ d_out.requirePhase(n, true);
+ d_ee.addTriggerPredicate(n);
+ d_ee.addTerm(n[0]);
+ d_ee.addTerm(n[1]);
+ return;
+ }
+ else if (k == STRING_TO_CODE)
+ {
+ d_hasStrCode = true;
+ }
+ registerTerm(n, 0);
+ TypeNode tn = n.getType();
+ if (tn.isRegExp() && n.isVar())
+ {
+ std::stringstream ss;
+ ss << "Regular expression variables are not supported.";
+ throw LogicException(ss.str());
+ }
+ if (tn.isString())
+ {
+ // all characters of constants should fall in the alphabet
+ if (n.isConst())
+ {
+ std::vector<unsigned> vec = n.getConst<String>().getVec();
+ for (unsigned u : vec)
+ {
+ if (u >= d_cardSize)
+ {
+ std::stringstream ss;
+ ss << "Characters in string \"" << n
+ << "\" are outside of the given alphabet.";
+ throw LogicException(ss.str());
+ }
+ }
+ }
+ d_ee.addTerm(n);
+ }
+ else if (tn.isBoolean())
+ {
+ // Get triggered for both equal and dis-equal
+ d_ee.addTriggerPredicate(n);
+ }
+ else
+ {
+ // Function applications/predicates
+ d_ee.addTerm(n);
+ }
+ // Set d_functionsTerms stores all function applications that are
+ // relevant to theory combination. Notice that this is a subset of
+ // the applications whose kinds are function kinds in the equality
+ // engine. This means it does not include applications of operators
+ // like re.++, which is not a function kind in the equality engine.
+ // Concatenation terms do not need to be considered here because
+ // their arguments have string type and do not introduce any shared
+ // terms.
+ if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT)
+ {
+ d_functionsTerms.push_back(n);
+ }
+ if (options::stringFMF())
+ {
+ if (tn.isStringLike())
+ {
+ // Our decision strategy will minimize the length of this term if it is a
+ // variable but not an internally generated Skolem, or a term that does
+ // not belong to this theory.
+ if (n.isVar() ? !d_skCache.isSkolem(n)
+ : kindToTheoryId(k) != THEORY_STRINGS)
+ {
+ d_inputVars.insert(n);
+ Trace("strings-preregister") << "input variable: " << n << std::endl;
+ }
+ }
+ }
+}
+
+void TermRegistry::registerTerm(Node n, int effort)
+{
+ TypeNode tn = n.getType();
+ bool do_register = true;
+ if (!tn.isStringLike())
+ {
+ if (options::stringEagerLen())
+ {
+ do_register = effort == 0;
+ }
+ else
+ {
+ do_register = effort > 0 || n.getKind() != STRING_CONCAT;
+ }
+ }
+ if (!do_register)
+ {
+ return;
+ }
+ if (d_registeredTerms.find(n) != d_registeredTerms.end())
+ {
+ return;
+ }
+ d_registeredTerms.insert(n);
+ // ensure the type is registered
+ registerType(tn);
+ NodeManager* nm = NodeManager::currentNM();
+ Debug("strings-register") << "TheoryStrings::registerTerm() " << n
+ << ", effort = " << effort << std::endl;
+ Node regTermLem;
+ if (tn.isStringLike())
+ {
+ // register length information:
+ // for variables, split on empty vs positive length
+ // for concat/const/replace, introduce proxy var and state length relation
+ regTermLem = getRegisterTermLemma(n);
+ }
+ else if (n.getKind() == STRING_TO_CODE)
+ {
+ // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
+ Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
+ Node code_eq_neg1 = n.eqNode(d_negOne);
+ Node code_range = nm->mkNode(
+ AND,
+ nm->mkNode(GEQ, n, d_zero),
+ nm->mkNode(
+ LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
+ regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
+ }
+ else if (n.getKind() == STRING_STRIDOF)
+ {
+ Node len = utils::mkNLength(n[0]);
+ regTermLem = nm->mkNode(AND,
+ nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))),
+ nm->mkNode(LEQ, n, len));
+ }
+ if (!regTermLem.isNull())
+ {
+ Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
+ << std::endl;
+ Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
+ ++(d_statistics.d_lemmasRegisterTerm);
+ d_out.lemma(regTermLem);
+ }
+}
+
+void TermRegistry::registerType(TypeNode tn)
+{
+ if (d_registeredTypes.find(tn) != d_registeredTypes.end())
+ {
+ return;
+ }
+ d_registeredTypes.insert(tn);
+ if (tn.isStringLike())
+ {
+ // preregister the empty word for the type
+ Node emp = Word::mkEmptyWord(tn);
+ if (!d_ee.hasTerm(emp))
+ {
+ preRegisterTerm(emp);
+ }
+ }
+}
+
+Node TermRegistry::getRegisterTermLemma(Node n)
+{
+ Assert(n.getType().isStringLike());
+ NodeManager* nm = NodeManager::currentNM();
+ // register length information:
+ // for variables, split on empty vs positive length
+ // for concat/const/replace, introduce proxy var and state length relation
+ Node lsum;
+ if (n.getKind() != STRING_CONCAT && !n.isConst())
+ {
+ Node lsumb = nm->mkNode(STRING_LENGTH, n);
+ lsum = Rewriter::rewrite(lsumb);
+ // can register length term if it does not rewrite
+ if (lsum == lsumb)
+ {
+ registerTermAtomic(n, LENGTH_SPLIT);
+ return Node::null();
+ }
+ }
+ Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
+ StringsProxyVarAttribute spva;
+ sk.setAttribute(spva, true);
+ Node eq = Rewriter::rewrite(sk.eqNode(n));
+ d_proxyVar[n] = sk;
+ // If we are introducing a proxy for a constant or concat term, we do not
+ // need to send lemmas about its length, since its length is already
+ // implied.
+ if (n.isConst() || n.getKind() == STRING_CONCAT)
+ {
+ // do not send length lemma for sk.
+ registerTermAtomic(sk, LENGTH_IGNORE);
+ }
+ Node skl = nm->mkNode(STRING_LENGTH, sk);
+ if (n.getKind() == STRING_CONCAT)
+ {
+ std::vector<Node> nodeVec;
+ for (const Node& nc : n)
+ {
+ if (nc.getAttribute(StringsProxyVarAttribute()))
+ {
+ Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
+ nodeVec.push_back(d_proxyVarToLength[nc]);
+ }
+ else
+ {
+ Node lni = nm->mkNode(STRING_LENGTH, nc);
+ nodeVec.push_back(lni);
+ }
+ }
+ lsum = nm->mkNode(PLUS, nodeVec);
+ lsum = Rewriter::rewrite(lsum);
+ }
+ else if (n.isConst())
+ {
+ lsum = nm->mkConst(Rational(Word::getLength(n)));
+ }
+ Assert(!lsum.isNull());
+ d_proxyVarToLength[sk] = lsum;
+ Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
+
+ return nm->mkNode(AND, eq, ceq);
+}
+
+void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
+{
+ if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
+ {
+ return;
+ }
+ d_lengthLemmaTermsCache.insert(n);
+
+ if (s == LENGTH_IGNORE)
+ {
+ // ignore it
+ return;
+ }
+ std::map<Node, bool> reqPhase;
+ Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
+ if (!lenLem.isNull())
+ {
+ Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
+ << std::endl;
+ Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
+ ++(d_statistics.d_lemmasRegisterTermAtomic);
+ d_out.lemma(lenLem);
+ }
+ for (const std::pair<const Node, bool>& rp : reqPhase)
+ {
+ d_out.requirePhase(rp.first, rp.second);
+ }
+}
+
+SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
+
+const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
+{
+ return d_functionsTerms;
+}
+
+const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
+ const
+{
+ return d_inputVars;
+}
+
+bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
+
+Node TermRegistry::getRegisterTermAtomicLemma(Node n,
+ LengthStatus s,
+ std::map<Node, bool>& reqPhase)
+{
+ Assert(n.getType().isStringLike());
+ NodeManager* nm = NodeManager::currentNM();
+ Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
+ Node emp = Word::mkEmptyWord(n.getType());
+ if (s == LENGTH_GEQ_ONE)
+ {
+ Node neq_empty = n.eqNode(emp).negate();
+ Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
+ Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
+ Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
+ << std::endl;
+ Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
+ return len_geq_one;
+ }
+
+ if (s == LENGTH_ONE)
+ {
+ Node len_one = n_len.eqNode(d_one);
+ Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
+ << std::endl;
+ Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
+ return len_one;
+ }
+ Assert(s == LENGTH_SPLIT);
+
+ std::vector<Node> lems;
+ // split whether the string is empty
+ Node n_len_eq_z = n_len.eqNode(d_zero);
+ Node n_len_eq_z_2 = n.eqNode(emp);
+ Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
+ case_empty = Rewriter::rewrite(case_empty);
+ Node case_nempty = nm->mkNode(GT, n_len, d_zero);
+ if (!case_empty.isConst())
+ {
+ Node lem = nm->mkNode(OR, case_empty, case_nempty);
+ lems.push_back(lem);
+ // prefer trying the empty case first
+ // notice that requirePhase must only be called on rewritten literals that
+ // occur in the CNF stream.
+ n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
+ Assert(!n_len_eq_z.isConst());
+ reqPhase[n_len_eq_z] = true;
+ n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
+ Assert(!n_len_eq_z_2.isConst());
+ reqPhase[n_len_eq_z_2] = true;
+ }
+ else if (!case_empty.getConst<bool>())
+ {
+ // the rewriter knows that n is non-empty
+ lems.push_back(case_nempty);
+ }
+ else
+ {
+ // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
+ // n ---> "". Since this method is only called on non-constants n, it must
+ // be that n = "" ^ len( n ) = 0 does not rewrite to true.
+ Assert(false);
+ }
+
+ // additionally add len( x ) >= 0 ?
+ if (options::stringLenGeqZ())
+ {
+ Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
+ n_len_geq = Rewriter::rewrite(n_len_geq);
+ lems.push_back(n_len_geq);
+ }
+
+ if (lems.empty())
+ {
+ return Node::null();
+ }
+ return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems);
+}
+
+Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
+{
+ if (n.getNumChildren() == 0)
+ {
+ Node pn = getProxyVariableFor(n);
+ if (pn.isNull())
+ {
+ return Node::null();
+ }
+ Node eq = n.eqNode(pn);
+ eq = Rewriter::rewrite(eq);
+ if (std::find(exp.begin(), exp.end(), eq) == exp.end())
+ {
+ exp.push_back(eq);
+ }
+ return pn;
+ }
+ std::vector<Node> children;
+ if (n.getMetaKind() == metakind::PARAMETERIZED)
+ {
+ children.push_back(n.getOperator());
+ }
+ for (const Node& nc : n)
+ {
+ if (n.getType().isRegExp())
+ {
+ children.push_back(nc);
+ }
+ else
+ {
+ Node ns = getSymbolicDefinition(nc, exp);
+ if (ns.isNull())
+ {
+ return Node::null();
+ }
+ else
+ {
+ children.push_back(ns);
+ }
+ }
+ }
+ return NodeManager::currentNM()->mkNode(n.getKind(), children);
+}
+
+Node TermRegistry::getProxyVariableFor(Node n) const
+{
+ NodeNodeMap::const_iterator it = d_proxyVar.find(n);
+ if (it != d_proxyVar.end())
+ {
+ return (*it).second;
+ }
+ return Node::null();
+}
+
+void TermRegistry::inferSubstitutionProxyVars(Node n,
+ std::vector<Node>& vars,
+ std::vector<Node>& subs,
+ std::vector<Node>& unproc) const
+{
+ if (n.getKind() == AND)
+ {
+ for (const Node& nc : n)
+ {
+ inferSubstitutionProxyVars(nc, vars, subs, unproc);
+ }
+ return;
+ }
+ if (n.getKind() == EQUAL)
+ {
+ Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
+ ns = Rewriter::rewrite(ns);
+ if (ns.getKind() == EQUAL)
+ {
+ Node s;
+ Node v;
+ for (unsigned i = 0; i < 2; i++)
+ {
+ Node ss;
+ // determine whether this side has a proxy variable
+ if (ns[i].getAttribute(StringsProxyVarAttribute()))
+ {
+ // it is a proxy variable
+ ss = ns[i];
+ }
+ else if (ns[i].isConst())
+ {
+ ss = getProxyVariableFor(ns[i]);
+ }
+ if (!ss.isNull())
+ {
+ v = ns[1 - i];
+ // if the other side is a constant or variable
+ if (v.getNumChildren() == 0)
+ {
+ if (s.isNull())
+ {
+ s = ss;
+ }
+ else
+ {
+ // both sides of the equality correspond to a proxy variable
+ if (ss == s)
+ {
+ // it is a trivial equality, e.g. between a proxy variable
+ // and its definition
+ return;
+ }
+ else
+ {
+ // equality between proxy variables, non-trivial
+ s = Node::null();
+ }
+ }
+ }
+ }
+ }
+ if (!s.isNull())
+ {
+ // the equality can be turned into a substitution
+ subs.push_back(s);
+ vars.push_back(v);
+ return;
+ }
+ }
+ else
+ {
+ n = ns;
+ }
+ }
+ if (!n.isConst() || !n.getConst<bool>())
+ {
+ unproc.push_back(n);
+ }
+}
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback