/****************************************************************************** * Top contributors (to current version): * Andrew Reynolds, Andres Noetzli, Tianyi Liang * * This file is part of the cvc5 project. * * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS * in the top-level source directory and their institutional affiliations. * All rights reserved. See the file COPYING in the top-level source * directory for licensing information. * **************************************************************************** * * Term registry for the theory of strings. */ #include "cvc5_private.h" #ifndef CVC5__THEORY__STRINGS__TERM_REGISTRY_H #define CVC5__THEORY__STRINGS__TERM_REGISTRY_H #include "context/cdhashset.h" #include "context/cdlist.h" #include "proof/eager_proof_generator.h" #include "proof/proof_node_manager.h" #include "smt/env_obj.h" #include "theory/output_channel.h" #include "theory/strings/arith_entail.h" #include "theory/strings/infer_info.h" #include "theory/strings/sequences_stats.h" #include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" #include "theory/uf/equality_engine.h" namespace cvc5 { namespace theory { namespace strings { class InferenceManager; /** * This class manages all the (pre)registration tasks for terms. These tasks * include: * (1) Sending out preregistration lemmas for terms, * (2) Add terms to the equality engine, * (3) Maintaining a list of terms d_functionsTerms (for theory combination), * (4) Maintaining a list of input variables d_inputVars (for fmf). * (5) Maintaining a skolem cache. Notice that this skolem cache is the * official skolem cache that should be used by all modules in TheoryStrings. */ class TermRegistry : protected EnvObj { typedef context::CDHashSet NodeSet; typedef context::CDHashSet> TypeNodeSet; typedef context::CDHashMap NodeNodeMap; public: TermRegistry(Env& env, SolverState& s, SequencesStatistics& statistics, ProofNodeManager* pnm); ~TermRegistry(); /** get the cardinality of the alphabet used, based on the options */ uint32_t getAlphabetCardinality() const; /** Finish initialize, which sets the inference manager */ void finishInit(InferenceManager* im); /** The eager reduce routine * * Constructs a lemma for t that is incomplete, but communicates pertinent * information about t. This is analogous to StringsPreprocess::reduce. * * In practice, we send this lemma eagerly, as soon as t is registered. * * @param t The node to reduce, * @param sc The Skolem cache to use for new variables, * @param alphaCard The cardinality of the alphabet we are assuming * @return The eager reduction for t. */ static Node eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard); /** * Returns a lemma indicating that the length of a term t whose type is * string-like has positive length. The exact form of this lemma depends * on what works best in practice, currently: * (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0)) * * @param t The node to reduce, * @return The positive length lemma for t. */ static Node lengthPositive(Node t); /** * Preregister term, called when TheoryStrings::preRegisterTerm(n) is called. * This does the following: * - Checks for illegal terms and throws a LogicException if any term is * not handled. * - Adds the appropriate terms and triggers to the equality engine. * - Updates information about which terms exist, including * d_functionsTerms and d_hasStrCode. If we are using strings finite model * finding (options::stringsFmf), we determine if the term n should be * added to d_inputVars, the set of terms of type string whose length we are * minimizing with its decision strategy. * - Setting phase requirements on n if it is a formula and we prefer * decisions with a particular polarity (e.g. positive regular expression * memberships). */ void preRegisterTerm(TNode n); /** Register term * * This performs SAT-context-independent registration for a term n, which * may cause lemmas to be sent on the output channel that involve * "initial refinement lemmas" for n. This includes introducing proxy * variables for string terms and asserting that str.code terms are within * proper bounds. * * Effort is one of the following (TODO make enum #1881): * 0 : upon preregistration or internal assertion * 1 : upon occurrence in length term * 2 : before normal form computation * 3 : called on normal form terms * * Based on the strategy, we may choose to add these initial refinement * lemmas at one of the following efforts, where if it is not the given * effort, the call to this method does nothing. */ void registerTerm(Node n, int effort); /** register length * * This method is called on non-constant string terms n that are "atomic" * with respect to length. That is, the rewritten form of len(n) is itself. * * It sends a lemma on the output channel that ensures that the length n * satisfies its assigned status (given by argument s). * * If the status is LENGTH_ONE, we send the lemma len( n ) = 1. * * If the status is LENGTH_GEQ_ONE, we send a lemma n != "" ^ len( n ) > 0. * * If the status is LENGTH_SPLIT, we send a send a lemma of the form: * ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0 * This method also ensures that, when applicable, the left branch is taken * first via calls to requirePhase. * * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used * e.g. when the length of n is already implied by other constraints. * * In contrast to the above functions, it makes immediate calls to the output * channel instead of adding them to pending lists. */ void registerTermAtomic(Node n, LengthStatus s); //---------------------------- queries /** Get the skolem cache of this object */ SkolemCache* getSkolemCache(); /** Get all function terms that have been preregistered to this object */ const context::CDList& getFunctionTerms() const; /** * Get the "input variables", corresponding to the set of leaf nodes of * string-like type that have been preregistered as terms to this object. */ const context::CDHashSet& getInputVars() const; /** Returns true if any str.code terms have been preregistered */ bool hasStringCode() const; /** * @return true if any seq.nth or seq.update terms have been preregistered */ bool hasSeqUpdate() const; /** is handled update */ bool isHandledUpdate(Node n); /** get base */ Node getUpdateBase(Node n); //---------------------------- end queries //---------------------------- proxy variables /** Get symbolic definition * * This method returns the "symbolic definition" of n, call it n', and * populates the vector exp with an explanation such that exp => n = n'. * * The symbolic definition of n is the term where (maximal) subterms of n * are replaced by their proxy variables. For example, if we introduced * proxy variable v for x ++ y, then given input x ++ y = w, this method * returns v = w and adds v = x ++ y to exp. */ Node getSymbolicDefinition(Node n, std::vector& exp) const; /** Get proxy variable * * If this method returns the proxy variable for (string) term n if one * exists, otherwise it returns null. */ Node getProxyVariableFor(Node n) const; /** * Get the proxy variable for a term. If the proxy variable does not exist, * this method registers the term and then returns its proxy variable. * * @param n The term * @return Proxy variable for `n` */ Node ensureProxyVariableFor(Node n); /** * This method attempts to (partially) remove trivial parts of an explanation * n. It adds conjuncts of n that must be included in the explanation into * unproc and drops the rest. * * For example, say that v1 was introduced as a proxy variable for "ABC". * * Given the input n := v1 = "ABC" ^ x = "AA", this method sets: * unproc = { x = "AA" }. * In particular, this says that the information content of n is essentially * x = "AA". The first conjunct can be dropped from the explanation since * that equality simply corresponds to definition of a proxy variable. * * This method is used as a performance heuristic. It can infer when the * explanation of a fact depends only on equalities corresponding to * definitions of proxy variables, which can be omitted since they are * assumed to hold globally. */ void removeProxyEqs(Node n, std::vector& unproc) const; //---------------------------- end proxy variables private: /** Common constants */ Node d_zero; Node d_one; Node d_negOne; /** the cardinality of the alphabet */ uint32_t d_alphaCard; /** Reference to the solver state of the theory of strings. */ SolverState& d_state; /** Pointer to the inference manager of the theory of strings. */ InferenceManager* d_im; /** Reference to the statistics for the theory of strings/sequences. */ SequencesStatistics& d_statistics; /** have we asserted any str.code terms? */ bool d_hasStrCode; /** have we asserted any seq.update/seq.nth terms? */ bool d_hasSeqUpdate; /** The cache of all skolems, which is owned by this class. */ SkolemCache d_skCache; /** arithmetic entailment */ ArithEntail d_aent; /** All function terms that the theory has seen in the current SAT context */ context::CDList d_functionsTerms; /** * The set of terms of type string that are abstracted as leaf nodes. */ NodeSet d_inputVars; /** The user-context dependent cache of terms that have been preregistered */ NodeSet d_preregisteredTerms; /** The user-context dependent cache of terms that have been registered */ NodeSet d_registeredTerms; /** The types that we have preregistered */ TypeNodeSet d_registeredTypes; /** * Map string terms to their "proxy variables". Proxy variables are used are * intermediate variables so that length information can be communicated for * constants. For example, to communicate that "ABC" has length 3, we * introduce a proxy variable v_{"ABC"} for "ABC", and assert: * v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3 * Notice this is required since we cannot directly write len( "ABC" ) = 3, * which rewrites to 3 = 3. * In the above example, we store "ABC" -> v_{"ABC"} in this map. */ NodeNodeMap d_proxyVar; /** * Map from proxy variables to their normalized length. In the above example, * we store "ABC" -> 3. */ NodeNodeMap d_proxyVarToLength; /** List of terms that we have register length for */ NodeSet d_lengthLemmaTermsCache; /** Proof generator, manages proofs for lemmas generated by this class */ std::unique_ptr d_epg; /** Register type * * Ensures the theory solver is setup to handle string-like type tn. In * particular, this includes: * - Calling preRegisterTerm on the empty word for tn */ void registerType(TypeNode tn); /** register term * * This method is called on non-constant string terms n. It returns a lemma * that should be sent on the output channel of theory of strings upon * registration of this term, or null if no lemma is necessary. * * If n is an atomic term, the method registerTermAtomic is called for n * and s = LENGTH_SPLIT and no lemma is returned. */ TrustNode getRegisterTermLemma(Node n); /** * Get the lemma required for registering the length information for * atomic term n given length status s. For details, see registerTermAtomic. * * Additionally, this method may map literals to a required polarity in the * argument reqPhase, which should be processed by a call to requiredPhase by * the caller of this method. */ TrustNode getRegisterTermAtomicLemma(Node n, LengthStatus s, std::map& reqPhase); }; } // namespace strings } // namespace theory } // namespace cvc5 #endif /* CVC5__THEORY__STRINGS__TERM_REGISTRY_H */