From c9a7ca1f06080b7522ba582bdb99ba9077509209 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 7 Feb 2020 21:35:30 -0600 Subject: Split core solver from the theory of strings (#3713) This splits the main procedure from Liang et al CAV 2014 to its own file, the "core solver" of theory of strings. I have intentionally not updated or clang-formatted the code in core_solver.cpp since I would prefer this PR to involve as little change to behavior as possible (it is copied verbatim from theory_strings.cpp). Future PRs will clean this code up. --- src/theory/strings/theory_strings.h | 241 +----------------------------------- 1 file changed, 6 insertions(+), 235 deletions(-) (limited to 'src/theory/strings/theory_strings.h') diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 3b53fcded..5e00f9416 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -25,6 +25,7 @@ #include "expr/node_trie.h" #include "theory/decision_manager.h" #include "theory/strings/base_solver.h" +#include "theory/strings/core_solver.h" #include "theory/strings/infer_info.h" #include "theory/strings/inference_manager.h" #include "theory/strings/normal_form.h" @@ -226,16 +227,6 @@ class TheoryStrings : public Theory { SolverState d_state; /** The (custom) output channel of the theory of strings */ InferenceManager d_im; - /** map from terms to their normal forms */ - std::map d_normal_form; - /** get normal form */ - NormalForm& getNormalForm(Node n); - //map of pairs of terms that have the same normal form - NodeIntMap d_nf_pairs; - std::map< Node, std::vector< Node > > d_nf_pairs_data; - void addNormalFormPair( Node n1, Node n2 ); - bool isNormalFormPair( Node n1, Node n2 ); - bool isNormalFormPair2( Node n1, Node n2 ); // preReg cache NodeSet d_pregistered_terms_cache; NodeSet d_registered_terms_cache; @@ -261,13 +252,7 @@ private: Node getCurrentSubstitutionFor(int effort, Node n, std::vector& exp); std::map< Node, Node > d_eqc_to_len_term; - std::vector< Node > d_strings_eqc; - //list of non-congruent concat terms in each eqc - std::map< Node, std::vector< Node > > d_eqc; - std::map< Node, std::vector< Node > > d_flat_form; - std::map< Node, std::vector< int > > d_flat_form_index; - void debugPrintFlatForms( const char * tc ); ///////////////////////////////////////////////////////////////////////////// // MODEL GENERATION ///////////////////////////////////////////////////////////////////////////// @@ -393,120 +378,6 @@ private: void checkExtfInference(Node n, Node nr, ExtfInfoTmp& in, int effort); //--------------------------end for checkExtfEval - //--------------------------for checkFlatForm - /** - * This checks whether there are flat form inferences between eqc[start] and - * eqc[j] for some j>start. If the flag isRev is true, we check for flat form - * interferences in the reverse direction of the flat forms (note: - * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev` - * is true). For more details, see checkFlatForms below. - */ - void checkFlatForm(std::vector& eqc, size_t start, bool isRev); - //--------------------------end for checkFlatForm - - //--------------------------for checkCycles - Node checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp ); - //--------------------------end for checkCycles - - //--------------------------for checkNormalFormsEq - /** normalize equivalence class - * - * This method attempts to build a "normal form" for the equivalence class - * of string term n (for more details on normal forms, see normal_form.h - * or see Liang et al CAV 2014). In particular, this method checks whether the - * current normal form for each term in this equivalence class is identical. - * If it is not, then we add an inference via sendInference and abort the - * call. - */ - void normalizeEquivalenceClass( Node n ); - /** - * For each term in the equivalence class of eqc, this adds data regarding its - * normal form to normal_forms. The map term_to_nf_index maps terms to the - * index in normal_forms where their normal form data is located. - */ - void getNormalForms(Node eqc, - std::vector& normal_forms, - std::map& term_to_nf_index); - /** process normalize equivalence class - * - * This is called when an equivalence class contains a set of terms that - * have normal forms given by the argument normal_forms. It either - * verifies that all normal forms in this vector are identical, or otherwise - * adds a conflict, lemma, or inference via the sendInference method. - * - * To prioritize one inference versus another, it builds a set of possible - * inferences, at most two for each pair of distinct normal forms, - * corresponding to processing the normal form pair in the (forward, reverse) - * directions. Once all possible inferences are recorded, it executes the - * one with highest priority based on the enumeration type Inference. - */ - void processNEqc(std::vector& normal_forms); - /** process simple normal equality - * - * This method is called when two equal terms have normal forms nfi and nfj. - * It adds (typically at most one) possible inference to the vector pinfer. - * This inference is in the form of an InferInfo object, which stores the - * necessary information regarding how to process the inference. - * - * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that - * we are currently checking. This method will increment this index until - * it finds an index where these vectors differ, or until it reaches the - * end of these vectors. - * isRev: Whether we are processing the normal forms in reverse direction. - * Notice in this case the normal form vectors have been reversed, hence, - * many operations are identical to the forward case, e.g. index is - * incremented not decremented, while others require special care, e.g. - * constant strings "ABC" in the normal form vectors are not reversed to - * "CBA" and hence all operations should assume a flipped semantics for - * constants when isRev is true, - * rproc: the number of string components on the suffix of the normal form of - * nfi and nfj that were already processed. This is used when using - * fowards/backwards traversals of normal forms to ensure that duplicate - * inferences are not processed. - * pinfer: the set of possible inferences we add to. - */ - void processSimpleNEq(NormalForm& nfi, - NormalForm& nfj, - unsigned& index, - bool isRev, - unsigned rproc, - std::vector& pinfer); - //--------------------------end for checkNormalFormsEq - - //--------------------------for checkNormalFormsEq with loops - bool detectLoop(NormalForm& nfi, - NormalForm& nfj, - int index, - int& loop_in_i, - int& loop_in_j, - unsigned rproc); - - /** - * Result of processLoop() below. - */ - enum class ProcessLoopResult - { - /** Loop processing made an inference */ - INFERENCE, - /** Loop processing detected a conflict */ - CONFLICT, - /** Loop not processed or no loop detected */ - SKIPPED, - }; - - ProcessLoopResult processLoop(NormalForm& nfi, - NormalForm& nfj, - int loop_index, - int index, - InferInfo& info); - //--------------------------end for checkNormalFormsEq with loops - - //--------------------------for checkNormalFormsDeq - void processDeq( Node n1, Node n2 ); - int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj ); - int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ); - //--------------------------end for checkNormalFormsDeq - private: void addCarePairs(TNodeTrie* t1, TNodeTrie* t2, @@ -553,12 +424,6 @@ private: * of atom, including calls to registerTerm. */ void assertPendingFact(Node atom, bool polarity, Node exp); - /** - * This processes the infer info ii as an inference. In more detail, it calls - * the inference manager to process the inference, it introduces Skolems, and - * updates the set of normal form pairs. - */ - void doInferInfo(const InferInfo& ii); /** Register term * @@ -587,6 +452,11 @@ private: * inferring constants for equivalence classes. */ BaseSolver d_bsolver; + /** + * The core solver, responsible for reasoning about string concatenation + * with length constraints. + */ + CoreSolver d_csolver; /** regular expression solver module */ RegExpSolver d_regexp_solver; /** regular expression elimination module */ @@ -671,111 +541,12 @@ private: * effort=3, we apply context-dependent simplification based on model values. */ void checkExtfEval(int effort); - /** check cycles - * - * This inference schema ensures that a containment ordering < over the - * string equivalence classes is acyclic. We define this ordering < such that - * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2 - * if there exists a ti whose flat form (see below) is [w1...sj...wk] for - * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat - * form components that do not constitute this chain, e.g. (w1...wk) \ sj - * in the flat form above, must be empty. - * - * For more details, see the inference S-Cycle in Liang et al CAV 2014. - */ - void checkCycles(); - /** check flat forms - * - * This applies an inference schema based on "flat forms". The flat form of a - * string term t is a vector of representative terms [r1, ..., rn] such that - * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to - * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of - * the equivalence class containing t1. For example, if t is y ++ z ++ z, - * E is { y = "", w = z }, and w is the representative of the equivalence - * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms - * in the same equivalence classes with flat forms [r1...rn] and [s1...sm]. - * We may infer various facts based on this pair of terms. For example: - * ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si), - * rn = sn, if n=m and rj == sj for each j < n, - * ri = empty, if n=m+1 and ri == rj for each i=1,...,m. - * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences - * respectively. - * - * Notice that this inference scheme is an optimization and not needed for - * model-soundness. The motivation for this schema is that it is simpler than - * checkNormalFormsEq, which can be seen as a recursive version of this - * schema (see difference of "normal form" vs "flat form" below), and - * checkNormalFormsEq is complete, in the sense that if it passes with no - * inferences, we are ensured that all string equalities in the current - * context are satisfied. - * - * Must call checkCycles before this function in a strategy. - */ - void checkFlatForms(); /** check register terms pre-normal forms * * This calls registerTerm(n,2) on all non-congruent strings in the * equality engine of this class. */ void checkRegisterTermsPreNormalForm(); - /** check normal forms equalities - * - * This applies an inference schema based on "normal forms". The normal form - * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn}, - * where t1...tn are concatenation terms is a vector of representative terms - * [r1, ..., rm] such that: - * (1) if n=0, then m=1 and r1 is the representative of e, - * (2) if n>0, say - * t1 = t^1_1 ++ ... ++ t^1_m_1 - * ... - * tn = t^1_n ++ ... ++ t^_m_n - * for *each* i=1, ..., n, the result of concenating the normal forms of - * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class - * can be assigned a normal form, then all equalities between ti and tj are - * satisfied by all models that correspond to extensions of the current - * assignment. For further detail on this terminology, see Liang et al - * CAV 2014. - * - * Notice that all constant words are implicitly considered concatentation - * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c". - * - * At a high level, we build normal forms for equivalence classes bottom-up, - * starting with equivalence classes that are minimal with respect to the - * containment ordering < computed during checkCycles. While computing a - * normal form for an equivalence class, we may infer equalities between - * components of strings that must be equal (e.g. x=y when x++z == y++w when - * len(x)==len(y) is asserted), derive conflicts if two strings have disequal - * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split - * string terms into smaller components using fresh skolem variables (see - * Inference values with names "SPLIT"). We also may introduce regular - * expression constraints in this method for looping word equations (see - * the Inference INFER_FLOOP). - * - * If this inference schema returns no facts, lemmas, or conflicts, then - * we have successfully assigned normal forms for all equivalence classes, as - * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or - * conflict based on inferences in the Inference enumeration above. - */ - void checkNormalFormsEq(); - /** check normal forms disequalities - * - * This inference schema can be seen as the converse of the above schema. In - * particular, it ensures that each pair of distinct equivalence classes - * e1 and e2 have distinct normal forms. - * - * This method considers all pairs of distinct equivalence classes (e1,e2) - * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It - * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn] - * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are - * disequal and have the same length, then x1 and x2 have distinct normal - * forms. Otherwise, we may add splitting lemmas on the length of ri and si, - * or split on an equality between ri and si. - * - * If this inference schema returns no facts, lemmas, or conflicts, then all - * disequalities between string terms are satisfied by all models that are - * extensions of the current assignment. - */ - void checkNormalFormsDeq(); /** check codes * * This inference schema ensures that constraints between str.code terms -- cgit v1.2.3