diff options
author | Andrew Reynolds <andrew.j.reynolds@gmail.com> | 2019-04-23 13:40:27 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-23 13:40:27 -0500 |
commit | 53cade050e191c7c0dc0ebfae716a21162bd9b22 (patch) | |
tree | 99ce8fa8224660143a6afb79e65362dc5f469c9a | |
parent | d43f7760866a1a26769dfdebdffebdaf35309f9c (diff) |
Refactor normal forms in strings (#2897)
-rw-r--r-- | src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/theory/strings/normal_form.cpp | 163 | ||||
-rw-r--r-- | src/theory/strings/normal_form.h | 159 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.cpp | 856 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.h | 126 |
5 files changed, 861 insertions, 445 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 372888d36..cff31dbcd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -625,6 +625,8 @@ libcvc4_add_sources( theory/shared_terms_database.h theory/sort_inference.cpp theory/sort_inference.h + theory/strings/normal_form.cpp + theory/strings/normal_form.h theory/strings/regexp_elim.cpp theory/strings/regexp_elim.h theory/strings/regexp_operation.cpp diff --git a/src/theory/strings/normal_form.cpp b/src/theory/strings/normal_form.cpp new file mode 100644 index 000000000..c70845d64 --- /dev/null +++ b/src/theory/strings/normal_form.cpp @@ -0,0 +1,163 @@ +/********************* */ +/*! \file normal_form.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of normal form data structure for the theory of + ** strings. + **/ + +#include "theory/strings/normal_form.h" + +#include "options/strings_options.h" +#include "theory/rewriter.h" + +using namespace std; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +void NormalForm::init(Node base) +{ + Assert(base.getType().isString()); + Assert(base.getKind() != STRING_CONCAT); + d_base = base; + d_nf.clear(); + d_isRev = false; + d_exp.clear(); + d_expDep.clear(); + + // add to normal form + if (!base.isConst() || !base.getConst<String>().isEmptyString()) + { + d_nf.push_back(base); + } +} + +void NormalForm::reverse() +{ + std::reverse(d_nf.begin(), d_nf.end()); + d_isRev = !d_isRev; +} + +void NormalForm::splitConstant(unsigned index, Node c1, Node c2) +{ + Assert(Rewriter::rewrite(NodeManager::currentNM()->mkNode( + STRING_CONCAT, d_isRev ? c2 : c1, d_isRev ? c1 : c2)) + == d_nf[index]); + d_nf.insert(d_nf.begin() + index + 1, c2); + d_nf[index] = c1; + // update the dependency indices + // notice this is not critical for soundness: not doing the below incrementing + // will only lead to overapproximating when antecedants are required in + // explanations + for (const std::pair<Node, std::map<bool, unsigned> >& pe : d_expDep) + { + for (const std::pair<bool, unsigned>& pep : pe.second) + { + // See if this can be incremented: it can if this literal is not relevant + // to the current index, and hence it is not relevant for both c1 and c2. + Assert(pep.second >= 0 && pep.second <= d_nf.size()); + bool increment = (pep.first == d_isRev) + ? pep.second > index + : (d_nf.size() - 1 - pep.second) < index; + if (increment) + { + d_expDep[pe.first][pep.first] = pep.second + 1; + } + } + } +} + +void NormalForm::addToExplanation(Node exp, + unsigned new_val, + unsigned new_rev_val) +{ + if (std::find(d_exp.begin(), d_exp.end(), exp) == d_exp.end()) + { + d_exp.push_back(exp); + } + for (unsigned k = 0; k < 2; k++) + { + unsigned val = k == 0 ? new_val : new_rev_val; + std::map<bool, unsigned>::iterator itned = d_expDep[exp].find(k == 1); + if (itned == d_expDep[exp].end()) + { + Trace("strings-process-debug") + << "Deps : set dependency on " << exp << " to " << val + << " isRev=" << (k == 0) << std::endl; + d_expDep[exp][k == 1] = val; + } + else + { + Trace("strings-process-debug") + << "Deps : Multiple dependencies on " << exp << " : " << itned->second + << " " << val << " isRev=" << (k == 0) << std::endl; + // if we already have a dependency (in the case of non-linear string + // equalities), it is min/max + bool cmp = val > itned->second; + if (cmp == (k == 1)) + { + d_expDep[exp][k == 1] = val; + } + } + } +} + +void NormalForm::getExplanation(int index, std::vector<Node>& curr_exp) +{ + if (index == -1 || !options::stringMinPrefixExplain()) + { + curr_exp.insert(curr_exp.end(), d_exp.begin(), d_exp.end()); + return; + } + for (const Node& exp : d_exp) + { + int dep = static_cast<int>(d_expDep[exp][d_isRev]); + if (dep <= index) + { + curr_exp.push_back(exp); + Trace("strings-explain-prefix-debug") << " include : "; + } + else + { + Trace("strings-explain-prefix-debug") << " exclude : "; + } + Trace("strings-explain-prefix-debug") << exp << std::endl; + } +} + +void NormalForm::getExplanationForPrefixEq(NormalForm& nfi, + NormalForm& nfj, + int index_i, + int index_j, + std::vector<Node>& curr_exp) +{ + Assert(nfi.d_isRev == nfj.d_isRev); + Trace("strings-explain-prefix") + << "Get explanation for prefix " << index_i << ", " << index_j + << ", reverse = " << nfi.d_isRev << std::endl; + // get explanations + nfi.getExplanation(index_i, curr_exp); + nfj.getExplanation(index_j, curr_exp); + Trace("strings-explain-prefix") + << "Included " << curr_exp.size() << " / " + << (nfi.d_exp.size() + nfj.d_exp.size()) << std::endl; + if (nfi.d_base != nfj.d_base) + { + Node eq = nfi.d_base.eqNode(nfj.d_base); + curr_exp.push_back(eq); + } +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/normal_form.h b/src/theory/strings/normal_form.h new file mode 100644 index 000000000..73036e68f --- /dev/null +++ b/src/theory/strings/normal_form.h @@ -0,0 +1,159 @@ +/********************* */ +/*! \file normal_form.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Normal form datastructure for the theory of strings. + **/ + +#include "cvc4_private.h" + +#ifndef __CVC4__THEORY__STRINGS__NORMAL_FORM_H +#define __CVC4__THEORY__STRINGS__NORMAL_FORM_H + +#include <map> +#include <vector> +#include "expr/node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** normal forms + * + * Stores information regarding the "normal form" of terms t in the current + * context. Normal forms can be associated with terms, or with string + * equivalence classes. For the latter, the normal form of an equivalence class + * exists if exactly one unique normal form is associated to a subset of its + * terms. + * + * In the following we use example where assertions are: + * { x = y, y = z, y = u ++ v, u = u1 ++ u2 } + * and equivalence class [x] = { x, y, z, u ++ v }, whose normal form is + * (u1, u2, v) + */ +class NormalForm +{ + public: + NormalForm() : d_isRev(false) {} + /** + * The "base" of the normal form. This is some term in the equivalence + * class of t that the normal form is based on. This is an arbitrary term + * which is used as the reference point for explanations. In the above + * running example, let us assume the base of [x] is y. + */ + Node d_base; + /** the normal form, (u1, u2, v), in the above example */ + std::vector<Node> d_nf; + /** is the normal form d_nf stored in reverse order? */ + bool d_isRev; + /** + * The explanation for the normal form, this is a set of literals such that + * d_exp => d_base = d_nf + * In the above example, this is the set of equalities + * { y = u ++ v, u = u1 ++ u2 } + * If u ++ v was chosen as the base, then the first literal could be omitted. + */ + std::vector<Node> d_exp; + /** + * Map from literals in the vector d_exp to integers indicating indices in + * d_nf for which that literal L is relevant for explaining d_base = d_nf. + * + * In particular: + * - false maps to an (ideally maximal) index relative to the start of d_nf + * such that L is required for explaining why d_base has a prefix that + * includes the term at that index, + * - true maps to an (ideally maximal) index relative to the end of d_nf + * such that L is required for explaining why d_base has a suffix that + * includes the term at that index. + * We call these the forward and backwards dependency indices. + * + * In the above example: + * y = u ++ v : false -> 0, true -> 0 + * u = u1 ++ u2 : false -> 0, true -> 1 + * When explaining y = u1 ++ u2 ++ v, the equality y = u ++ v is required + * for explaining any prefix/suffix of y and its normal form. More + * interestingly, the equality u = u1 ++ u2 is not required for explaining + * that v is a suffix of y, since its reverse index in this map is 1, + * indicating that "u2" is the first position in u1 ++ u2 ++ v that it is + * required for explaining. + * + * This information is used to minimize explanations when conflicts arise, + * thereby strengthening conflict clauses and lemmas. + * + * For example, say u ++ v = y = x = u ++ w and w and v are distinct + * constants, using this dependency information, we could construct a + * conflict: + * x = y ^ y = u ++ v ^ x = u ++ w + * that does not include u = u1 ++ u2, because the conflict only pertains + * to the last position in the normal form of y. + */ + std::map<Node, std::map<bool, unsigned> > d_expDep; + /** initialize + * + * Initialize the normal form with base node base. If base is not the empty + * string, then d_nf is set to the singleton list containing base, otherwise + * d_nf is empty. + */ + void init(Node base); + /** reverse the content of normal form d_nf + * + * This operation is done in contexts where the normal form is being scanned + * in reverse order. + */ + void reverse(); + /** split constant + * + * Splits the constant in d_nf at index to constants c1 and c2. + * + * Notice this function depends on whether the normal form has been reversed + * d_isRev, as this impacts how the dependency indices are updated. + */ + void splitConstant(unsigned index, Node c1, Node c2); + /** add to explanation + * + * This adds exp to the explanation vector d_exp with new forward and + * backwards dependency indices new_val and new_rev_val. + * + * If exp already has dependencies, we update the forward dependency + * index to the minimum of the previous value and the new value, and + * similarly update the backwards dependency index to the maximum. + */ + void addToExplanation(Node exp, unsigned new_val, unsigned new_rev_val); + /** get explanation + * + * This gets the explanation for the prefix (resp. suffix) of the normal + * form up to index when d_isRev is false (resp. true). In particular; + * + * If index is -1, then this method adds all literals in d_exp to curr_exp. + * + * If index>=0, this method adds all literals in d_exp to curr_exp whose + * forward (resp. backwards) dependency index is less than index + * when isRev is false (resp. true). + */ + void getExplanation(int index, std::vector<Node>& curr_exp); + /** get explanation for prefix equality + * + * This adds to curr_exp the reason why the prefix of nfi up to index index_i + * is equivalent to the prefix of nfj up to index_j. The values of + * nfi.d_isRev and nfj.d_isRev affect how dependency indices are updated + * during this call. + */ + static void getExplanationForPrefixEq(NormalForm& nfi, + NormalForm& nfj, + int index_i, + int index_j, + std::vector<Node>& curr_exp); +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* __CVC4__THEORY__STRINGS__NORMAL_FORM_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 8731bd1a5..c2b87fbef 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -253,13 +253,13 @@ Node TheoryStrings::getNormalString(Node x, std::vector<Node>& nf_exp) if (!x.isConst()) { Node xr = getRepresentative(x); - if (d_normal_forms.find(xr) != d_normal_forms.end()) + std::map<Node, NormalForm>::iterator it = d_normal_form.find(xr); + if (it != d_normal_form.end()) { - Node ret = mkConcat(d_normal_forms[xr]); - nf_exp.insert(nf_exp.end(), - d_normal_forms_exp[xr].begin(), - d_normal_forms_exp[xr].end()); - addToExplanation(x, d_normal_forms_base[xr], nf_exp); + NormalForm& nf = it->second; + Node ret = mkConcat(nf.d_nf); + nf_exp.insert(nf_exp.end(), nf.d_exp.begin(), nf.d_exp.end()); + addToExplanation(x, nf.d_base, nf_exp); Trace("strings-debug") << "Term: " << x << " has a normal form " << ret << std::endl; return ret; @@ -400,11 +400,14 @@ bool TheoryStrings::getCurrentSubstitution( int effort, std::vector< Node >& var } }else if( effort>=1 && effort<3 && n.getType().isString() ){ //normal forms - Node ns = getNormalString( d_normal_forms_base[nr], exp[n] ); + NormalForm& nfnr = getNormalForm(nr); + Node ns = getNormalString(nfnr.d_base, exp[n]); subs.push_back( ns ); - Trace("strings-subs") << " normal eqc : " << ns << " " << d_normal_forms_base[nr] << " " << nr << std::endl; - if( !d_normal_forms_base[nr].isNull() ) { - addToExplanation( n, d_normal_forms_base[nr], exp[n] ); + Trace("strings-subs") << " normal eqc : " << ns << " " << nfnr.d_base + << " " << nr << std::endl; + if (!nfnr.d_base.isNull()) + { + addToExplanation(n, nfnr.d_base, exp[n]); } }else{ //representative? @@ -671,8 +674,8 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) //check if col[i][j] has only variables if (!eqc.isConst()) { - Assert(d_normal_forms.find(eqc) != d_normal_forms.end()); - if (d_normal_forms[eqc].size() == 1) + NormalForm& nfe = getNormalForm(eqc); + if (nfe.d_nf.size() == 1) { // does it have a code and the length of these equivalence classes are // one? @@ -795,20 +798,31 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) //step 4 : assign constants to all other equivalence classes for( unsigned i=0; i<nodes.size(); i++ ){ if( processed.find( nodes[i] )==processed.end() ){ - Assert( d_normal_forms.find( nodes[i] )!=d_normal_forms.end() ); - Trace("strings-model") << "Construct model for " << nodes[i] << " based on normal form "; - for( unsigned j=0; j<d_normal_forms[nodes[i]].size(); j++ ) { - if( j>0 ) Trace("strings-model") << " ++ "; - Trace("strings-model") << d_normal_forms[nodes[i]][j]; - Node r = getRepresentative( d_normal_forms[nodes[i]][j] ); - if( !r.isConst() && processed.find( r )==processed.end() ){ - Trace("strings-model") << "(UNPROCESSED)"; + NormalForm& nf = getNormalForm(nodes[i]); + if (Trace.isOn("strings-model")) + { + Trace("strings-model") + << "Construct model for " << nodes[i] << " based on normal form "; + for (unsigned j = 0, size = nf.d_nf.size(); j < size; j++) + { + Node n = nf.d_nf[j]; + if (j > 0) + { + Trace("strings-model") << " ++ "; + } + Trace("strings-model") << n; + Node r = getRepresentative(n); + if (!r.isConst() && processed.find(r) == processed.end()) + { + Trace("strings-model") << "(UNPROCESSED)"; + } } } Trace("strings-model") << std::endl; std::vector< Node > nc; - for( unsigned j=0; j<d_normal_forms[nodes[i]].size(); j++ ) { - Node r = getRepresentative( d_normal_forms[nodes[i]][j] ); + for (const Node& n : nf.d_nf) + { + Node r = getRepresentative(n); Assert( r.isConst() || processed.find( r )!=processed.end() ); nc.push_back(r.isConst() ? r : processed[r]); } @@ -2475,8 +2489,7 @@ void TheoryStrings::checkNormalFormsEq() } // calculate normal forms for each equivalence class, possibly adding // splitting lemmas - d_normal_forms.clear(); - d_normal_forms_exp.clear(); + d_normal_form.clear(); std::map<Node, Node> nf_to_eqc; std::map<Node, Node> eqc_to_nf; std::map<Node, Node> eqc_to_exp; @@ -2490,16 +2503,17 @@ void TheoryStrings::checkNormalFormsEq() { return; } - Node nf_term = mkConcat(d_normal_forms[eqc]); + NormalForm& nfe = getNormalForm(eqc); + Node nf_term = mkConcat(nfe.d_nf); std::map<Node, Node>::iterator itn = nf_to_eqc.find(nf_term); if (itn != nf_to_eqc.end()) { + NormalForm& nfe_eq = getNormalForm(itn->second); // two equivalence classes have same normal form, merge std::vector<Node> nf_exp; - nf_exp.push_back(mkAnd(d_normal_forms_exp[eqc])); + nf_exp.push_back(mkAnd(nfe.d_exp)); nf_exp.push_back(eqc_to_exp[itn->second]); - Node eq = - d_normal_forms_base[eqc].eqNode(d_normal_forms_base[itn->second]); + Node eq = nfe.d_base.eqNode(nfe_eq.d_base); sendInference(nf_exp, eq, "Normal_Form"); if( hasProcessed() ){ return; @@ -2509,7 +2523,7 @@ void TheoryStrings::checkNormalFormsEq() { nf_to_eqc[nf_term] = eqc; eqc_to_nf[eqc] = nf_term; - eqc_to_exp[eqc] = mkAnd(d_normal_forms_exp[eqc]); + eqc_to_exp[eqc] = mkAnd(nfe.d_exp); } Trace("strings-process-debug") << "Done verifying normal forms are the same for " << eqc << std::endl; @@ -2521,8 +2535,8 @@ void TheoryStrings::checkNormalFormsEq() it != eqc_to_exp.end(); ++it) { - Trace("strings-nf") << " N[" << it->first << "] (base " - << d_normal_forms_base[it->first] + NormalForm& nf = getNormalForm(it->first); + Trace("strings-nf") << " N[" << it->first << "] (base " << nf.d_base << ") = " << eqc_to_nf[it->first] << std::endl; Trace("strings-nf") << " exp: " << it->second << std::endl; } @@ -2545,9 +2559,10 @@ void TheoryStrings::checkCodes() std::vector<Node> const_codes; for (const Node& eqc : d_strings_eqc) { - if (d_normal_forms[eqc].size() == 1 && d_normal_forms[eqc][0].isConst()) + NormalForm& nfe = getNormalForm(eqc); + if (nfe.d_nf.size() == 1 && nfe.d_nf[0].isConst()) { - Node c = d_normal_forms[eqc][0]; + Node c = nfe.d_nf[0]; Trace("strings-code-debug") << "Get proxy variable for " << c << std::endl; Node cc = nm->mkNode(kind::STRING_CODE, c); @@ -2616,83 +2631,65 @@ void TheoryStrings::normalizeEquivalenceClass( Node eqc ) { #endif //do nothing Trace("strings-process-debug") << "Return process equivalence class " << eqc << " : empty." << std::endl; - d_normal_forms_base[eqc] = d_emptyString; - d_normal_forms[eqc].clear(); - d_normal_forms_exp[eqc].clear(); + d_normal_form[eqc].init(d_emptyString); } else { - Assert( d_normal_forms.find(eqc)==d_normal_forms.end() ); - //phi => t = s1 * ... * sn - // normal form for each non-variable term in this eqc (s1...sn) - std::vector< std::vector< Node > > normal_forms; - // explanation for each normal form (phi) - std::vector< std::vector< Node > > normal_forms_exp; - // dependency information - std::vector< std::map< Node, std::map< bool, int > > > normal_forms_exp_depend; - // record terms for each normal form (t) - std::vector< Node > normal_form_src; - // get normal forms - getNormalForms(eqc, normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend); + // should not have computed the normal form of this equivalence class yet + Assert(d_normal_form.find(eqc) == d_normal_form.end()); + // Normal forms for the relevant terms in the equivalence class of eqc + std::vector<NormalForm> normal_forms; + // map each term to its index in the above vector + std::map<Node, unsigned> term_to_nf_index; + // get the normal forms + getNormalForms(eqc, normal_forms, term_to_nf_index); if( hasProcessed() ){ return; } // process the normal forms - processNEqc( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend ); + processNEqc(normal_forms); if( hasProcessed() ){ return; } - //debugPrintNormalForms( "strings-solve", eqc, normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend ); - + // debugPrintNormalForms( "strings-solve", eqc, normal_forms ); + //construct the normal form Assert( !normal_forms.empty() ); - - int nf_index = 0; - std::vector< Node >::iterator itn = std::find( normal_form_src.begin(), normal_form_src.end(), eqc ); - if( itn!=normal_form_src.end() ){ - nf_index = itn - normal_form_src.begin(); - Trace("strings-solve-debug2") << "take normal form " << nf_index << std::endl; - Assert( normal_form_src[nf_index]==eqc ); - }else{ - //just take the first normal form - Trace("strings-solve-debug2") << "take the first normal form" << std::endl; - } - d_normal_forms[eqc].insert( d_normal_forms[eqc].end(), normal_forms[nf_index].begin(), normal_forms[nf_index].end() ); - d_normal_forms_exp[eqc].insert( d_normal_forms_exp[eqc].end(), normal_forms_exp[nf_index].begin(), normal_forms_exp[nf_index].end() ); - Trace("strings-solve-debug2") << "take normal form ... done" << std::endl; - d_normal_forms_base[eqc] = normal_form_src[nf_index]; - //track dependencies - for( unsigned i=0; i<normal_forms_exp[nf_index].size(); i++ ){ - Node exp = normal_forms_exp[nf_index][i]; - for( unsigned r=0; r<2; r++ ){ - d_normal_forms_exp_depend[eqc][exp][r==0] = normal_forms_exp_depend[nf_index][exp][r==0]; - } + unsigned nf_index = 0; + std::map<Node, unsigned>::iterator it = term_to_nf_index.find(eqc); + // we prefer taking the normal form whose base is the equivalence + // class representative, since this leads to shorter explanations in + // some cases. + if (it != term_to_nf_index.end()) + { + nf_index = it->second; } - Trace("strings-process-debug") << "Return process equivalence class " << eqc << " : returned, size = " << d_normal_forms[eqc].size() << std::endl; + d_normal_form[eqc] = normal_forms[nf_index]; + Trace("strings-process-debug") + << "Return process equivalence class " << eqc + << " : returned, size = " << d_normal_form[eqc].d_nf.size() + << std::endl; } } -void trackNfExpDependency( std::vector< Node >& nf_exp_n, std::map< Node, std::map< bool, int > >& nf_exp_depend_n, Node exp, int new_val, int new_rev_val ){ - if( std::find( nf_exp_n.begin(), nf_exp_n.end(), exp )==nf_exp_n.end() ){ - nf_exp_n.push_back( exp ); - } - for( unsigned k=0; k<2; k++ ){ - int val = k==0 ? new_val : new_rev_val; - std::map< bool, int >::iterator itned = nf_exp_depend_n[exp].find( k==1 ); - if( itned==nf_exp_depend_n[exp].end() ){ - Trace("strings-process-debug") << "Deps : set dependency on " << exp << " to " << val << " isRev=" << (k==0) << std::endl; - nf_exp_depend_n[exp][k==1] = val; - }else{ - Trace("strings-process-debug") << "Deps : Multiple dependencies on " << exp << " : " << itned->second << " " << val << " isRev=" << (k==0) << std::endl; - //if we already have a dependency (in the case of non-linear string equalities), it is min/max - bool cmp = val > itned->second; - if( cmp==(k==1) ){ - nf_exp_depend_n[exp][k==1] = val; - } - } +NormalForm& TheoryStrings::getNormalForm(Node n) +{ + std::map<Node, NormalForm>::iterator itn = d_normal_form.find(n); + if (itn == d_normal_form.end()) + { + Trace("strings-warn") << "WARNING: returning empty normal form for " << n + << std::endl; + // Shouln't ask for normal forms of strings that weren't computed. This + // likely means that n is not a representative or not a term in the current + // context. We simply return a default normal form here in this case. + Assert(false); + return d_normal_form[n]; } + return itn->second; } -void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ) { +void TheoryStrings::getNormalForms(Node eqc, + std::vector<NormalForm>& normal_forms, + std::map<Node, unsigned>& term_to_nf_index) +{ //constant for equivalence class Node eqc_non_c = eqc; Trace("strings-process-debug") << "Get normal forms " << eqc << std::endl; @@ -2700,79 +2697,115 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > while( !eqc_i.isFinished() ){ Node n = (*eqc_i); if( d_congruent.find( n )==d_congruent.end() ){ - if( n.getKind() == kind::CONST_STRING || n.getKind() == kind::STRING_CONCAT ){ + if (n.getKind() == CONST_STRING || n.getKind() == STRING_CONCAT) + { Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl; - std::vector< Node > nf_n; - std::vector< Node > nf_exp_n; - std::map< Node, std::map< bool, int > > nf_exp_depend_n; - if( n.getKind()==kind::CONST_STRING ){ - if( n!=d_emptyString ) { - nf_n.push_back( n ); - } - }else if( n.getKind()==kind::STRING_CONCAT ){ + NormalForm nf_curr; + if (n.getKind() == CONST_STRING) + { + nf_curr.init(n); + } + else if (n.getKind() == STRING_CONCAT) + { + // set the base to n, we construct the other portions of nf_curr in + // the following. + nf_curr.d_base = n; for( unsigned i=0; i<n.getNumChildren(); i++ ) { Node nr = d_equalityEngine.getRepresentative( n[i] ); + // get the normal form for the component + NormalForm& nfr = getNormalForm(nr); + std::vector<Node>& nfrv = nfr.d_nf; Trace("strings-process-debug") << "Normalizing subterm " << n[i] << " = " << nr << std::endl; - Assert( d_normal_forms.find( nr )!=d_normal_forms.end() ); - unsigned orig_size = nf_n.size(); - unsigned add_size = d_normal_forms[nr].size(); + unsigned orig_size = nf_curr.d_nf.size(); + unsigned add_size = nfrv.size(); //if not the empty string, add to current normal form - if( !d_normal_forms[nr].empty() ){ - for( unsigned r=0; r<d_normal_forms[nr].size(); r++ ) { - if( Trace.isOn("strings-error") ) { - if( d_normal_forms[nr][r].getKind()==kind::STRING_CONCAT ){ - Trace("strings-error") << "Strings::Error: From eqc = " << eqc << ", " << n << " index " << i << ", bad normal form : "; - for( unsigned rr=0; rr<d_normal_forms[nr].size(); rr++ ) { - Trace("strings-error") << d_normal_forms[nr][rr] << " "; + if (!nfrv.empty()) + { + // if in a build with assertions, we run the following block, + // which checks that normal forms do not have concat terms. + if (Configuration::isAssertionBuild()) + { + for (const Node& nn : nfrv) + { + if (Trace.isOn("strings-error")) + { + if (nn.getKind() == STRING_CONCAT) + { + Trace("strings-error") + << "Strings::Error: From eqc = " << eqc << ", " << n + << " index " << i << ", bad normal form : "; + for (unsigned rr = 0; rr < nfrv.size(); rr++) + { + Trace("strings-error") << nfrv[rr] << " "; + } + Trace("strings-error") << std::endl; } - Trace("strings-error") << std::endl; } + Assert(nn.getKind() != kind::STRING_CONCAT); } - Assert( d_normal_forms[nr][r].getKind()!=kind::STRING_CONCAT ); } - nf_n.insert( nf_n.end(), d_normal_forms[nr].begin(), d_normal_forms[nr].end() ); + nf_curr.d_nf.insert(nf_curr.d_nf.end(), nfrv.begin(), nfrv.end()); } - - for( unsigned j=0; j<d_normal_forms_exp[nr].size(); j++ ){ - Node exp = d_normal_forms_exp[nr][j]; - //track depends - trackNfExpDependency( nf_exp_n, nf_exp_depend_n, exp, - orig_size + d_normal_forms_exp_depend[nr][exp][false], - orig_size + ( add_size - d_normal_forms_exp_depend[nr][exp][true] ) ); + // Track explanation for the normal form. This is in two parts. + // First, we must carry the explanation of the normal form computed + // for the representative nr. + for (const Node& exp : nfr.d_exp) + { + // The explanation is only relevant for the subsegment it was + // previously relevant for, shifted now based on its relative + // placement in the normal form of n. + nf_curr.addToExplanation( + exp, + orig_size + nfr.d_expDep[exp][false], + orig_size + (add_size - nfr.d_expDep[exp][true])); } - if( d_normal_forms_base[nr]!=n[i] ){ - Assert( d_normal_forms_base.find( nr )!=d_normal_forms_base.end() ); - Node eq = n[i].eqNode( d_normal_forms_base[nr] ); - //track depends : entire current segment is dependent upon base equality - trackNfExpDependency( nf_exp_n, nf_exp_depend_n, eq, orig_size, orig_size + add_size ); + // Second, must explain that the component n[i] is equal to the + // base of the normal form for nr. + Node base = nfr.d_base; + if (base != n[i]) + { + Node eq = n[i].eqNode(base); + // The equality is relevant for the entire current segment + nf_curr.addToExplanation(eq, orig_size, orig_size + add_size); } } - //convert forward indices to reverse indices - int total_size = nf_n.size(); - for( std::map< Node, std::map< bool, int > >::iterator it = nf_exp_depend_n.begin(); it != nf_exp_depend_n.end(); ++it ){ - it->second[true] = total_size - it->second[true]; - Assert( it->second[true]>=0 ); + // Now that we are finished with the loop, we convert forward indices + // to reverse indices in the explanation dependency information + int total_size = nf_curr.d_nf.size(); + for (std::pair<const Node, std::map<bool, unsigned> >& ed : + nf_curr.d_expDep) + { + ed.second[true] = total_size - ed.second[true]; + Assert(ed.second[true] >= 0); } } //if not equal to self - if( nf_n.size()>1 || ( nf_n.size()==1 && nf_n[0].getKind()==kind::CONST_STRING ) ){ - if( nf_n.size()>1 ) { - for( unsigned i=0; i<nf_n.size(); i++ ){ - if( Trace.isOn("strings-error") ){ - Trace("strings-error") << "Cycle for normal form "; - printConcat(nf_n,"strings-error"); - Trace("strings-error") << "..." << nf_n[i] << std::endl; + std::vector<Node>& currv = nf_curr.d_nf; + if (currv.size() > 1 + || (currv.size() == 1 && currv[0].getKind() == CONST_STRING)) + { + // if in a build with assertions, check that normal form is acyclic + if (Configuration::isAssertionBuild()) + { + if (currv.size() > 1) + { + for (unsigned i = 0; i < currv.size(); i++) + { + if (Trace.isOn("strings-error")) + { + Trace("strings-error") << "Cycle for normal form "; + printConcat(currv, "strings-error"); + Trace("strings-error") << "..." << currv[i] << std::endl; + } + Assert(!areEqual(currv[i], n)); } - Assert( !areEqual( nf_n[i], n ) ); } } - normal_forms.push_back(nf_n); - normal_form_src.push_back(n); - normal_forms_exp.push_back(nf_exp_n); - normal_forms_exp_depend.push_back(nf_exp_depend_n); + term_to_nf_index[n] = normal_forms.size(); + normal_forms.push_back(nf_curr); }else{ //this was redundant: combination of self + empty string(s) - Node nn = nf_n.size()==0 ? d_emptyString : nf_n[0]; + Node nn = currv.size() == 0 ? d_emptyString : currv[0]; Assert( areEqual( nn, eqc ) ); } }else{ @@ -2784,41 +2817,56 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > if( normal_forms.empty() ) { Trace("strings-solve-debug2") << "construct the normal form" << std::endl; - //do not choose a concat here use "eqc_non_c" (in this case they have non-trivial explanation why they normalize to self) - std::vector< Node > eqc_non_c_nf; - getConcatVec( eqc_non_c, eqc_non_c_nf ); - normal_forms.push_back( eqc_non_c_nf ); - normal_form_src.push_back( eqc_non_c ); - normal_forms_exp.push_back( std::vector< Node >() ); - normal_forms_exp_depend.push_back( std::map< Node, std::map< bool, int > >() ); + // This case happens when there are no non-trivial normal forms for this + // equivalence class. For example, given assertions: + // { x = y ++ z, x = y, z = "" } + // The equivalence class of { x, y, y ++ z } is such that the normal form + // of all terms is a variable (either x or y) in the equivalence class + // itself. Thus, the normal form of this equivalence class can be assigned + // to one of these variables. + // We use a non-concatenation term among the terms in this equivalence + // class, which is stored in eqc_non_c. The reason is this does not require + // an explanation, whereas e.g. y ++ z would require the explanation z = "" + // to justify its normal form is y. + Assert(eqc_non_c.getKind() != STRING_CONCAT); + NormalForm nf_triv; + nf_triv.init(eqc_non_c); + normal_forms.push_back(nf_triv); }else{ if(Trace.isOn("strings-solve")) { Trace("strings-solve") << "--- Normal forms for equivalance class " << eqc << " : " << std::endl; - for( unsigned i=0; i<normal_forms.size(); i++ ) { - Trace("strings-solve") << "#" << i << " (from " << normal_form_src[i] << ") : "; - for( unsigned j=0; j<normal_forms[i].size(); j++ ) { + for (unsigned i = 0, size = normal_forms.size(); i < size; i++) + { + NormalForm& nf = normal_forms[i]; + Trace("strings-solve") << "#" << i << " (from " << nf.d_base << ") : "; + for (unsigned j = 0, sizej = nf.d_nf.size(); j < sizej; j++) + { if(j>0) { Trace("strings-solve") << ", "; } - Trace("strings-solve") << normal_forms[i][j]; + Trace("strings-solve") << nf.d_nf[j]; } Trace("strings-solve") << std::endl; Trace("strings-solve") << " Explanation is : "; - if(normal_forms_exp[i].size() == 0) { + if (nf.d_exp.size() == 0) + { Trace("strings-solve") << "NONE"; } else { - for( unsigned j=0; j<normal_forms_exp[i].size(); j++ ) { + for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++) + { if(j>0) { Trace("strings-solve") << " AND "; } - Trace("strings-solve") << normal_forms_exp[i][j]; + Trace("strings-solve") << nf.d_exp[j]; } Trace("strings-solve") << std::endl; Trace("strings-solve") << "WITH DEPENDENCIES : " << std::endl; - for( unsigned j=0; j<normal_forms_exp[i].size(); j++ ) { - Trace("strings-solve") << " " << normal_forms_exp[i][j] << " -> "; - Trace("strings-solve") << normal_forms_exp_depend[i][normal_forms_exp[i][j]][false] << ","; - Trace("strings-solve") << normal_forms_exp_depend[i][normal_forms_exp[i][j]][true] << std::endl; + for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++) + { + Node exp = nf.d_exp[j]; + Trace("strings-solve") << " " << exp << " -> "; + Trace("strings-solve") << nf.d_expDep[exp][false] << ","; + Trace("strings-solve") << nf.d_expDep[exp][true] << std::endl; } } Trace("strings-solve") << std::endl; @@ -2832,10 +2880,14 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > Node c = getConstantEqc( eqc ); if( !c.isNull() ){ Trace("strings-solve") << "Eqc is constant " << c << std::endl; - for( unsigned i=0; i<normal_forms.size(); i++ ) { + for (unsigned i = 0, size = normal_forms.size(); i < size; i++) + { + NormalForm& nf = normal_forms[i]; int firstc, lastc; - if( !TheoryStringsRewriter::canConstantContainList( c, normal_forms[i], firstc, lastc ) ){ - Node n = normal_form_src[i]; + if (!TheoryStringsRewriter::canConstantContainList( + c, nf.d_nf, firstc, lastc)) + { + Node n = nf.d_base; //conflict Trace("strings-solve") << "Normal form for " << n << " cannot be contained in constant " << c << std::endl; //conflict, explanation is n = base ^ base = c ^ relevant porition of ( n = N[n] ) @@ -2847,7 +2899,7 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > exp.push_back( d_eqc_to_const_exp[eqc] ); } //TODO: this can be minimized based on firstc/lastc, normal_forms_exp_depend - exp.insert( exp.end(), normal_forms_exp[i].begin(), normal_forms_exp[i].end() ); + exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end()); Node conc = d_false; sendInference( exp, conc, "N_NCTN" ); } @@ -2856,52 +2908,29 @@ void TheoryStrings::getNormalForms( Node &eqc, std::vector< std::vector< Node > } } -void TheoryStrings::getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, int index, bool isRev, std::vector< Node >& curr_exp ) { - if( index==-1 || !options::stringMinPrefixExplain() ){ - curr_exp.insert(curr_exp.end(), normal_forms_exp[i].begin(), normal_forms_exp[i].end() ); - }else{ - for( unsigned k=0; k<normal_forms_exp[i].size(); k++ ){ - Node exp = normal_forms_exp[i][k]; - int dep = normal_forms_exp_depend[i][exp][isRev]; - if( dep<=index ){ - curr_exp.push_back( exp ); - Trace("strings-explain-prefix-debug") << " include : " << exp << std::endl; - }else{ - Trace("strings-explain-prefix-debug") << " exclude : " << exp << std::endl; - } - } - } -} - -void TheoryStrings::getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp ) { - Trace("strings-explain-prefix") << "Get explanation for prefix " << index_i << ", " << index_j << " of normal forms " << i << " and " << j << ", reverse = " << isRev << std::endl; - for( unsigned r=0; r<2; r++ ){ - getExplanationVectorForPrefix( normal_forms_exp, normal_forms_exp_depend, r==0 ? i : j, r==0 ? index_i : index_j, isRev, curr_exp ); - } - Trace("strings-explain-prefix") << "Included " << curr_exp.size() << " / " << ( normal_forms_exp[i].size() + normal_forms_exp[j].size() ) << std::endl; - addToExplanation( normal_form_src[i], normal_form_src[j], curr_exp ); -} - - -void TheoryStrings::processNEqc( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ){ +void TheoryStrings::processNEqc(std::vector<NormalForm>& normal_forms) +{ //the possible inferences std::vector< InferInfo > pinfer; // loop over all pairs for(unsigned i=0; i<normal_forms.size()-1; i++) { //unify each normalform[j] with normal_forms[i] for(unsigned j=i+1; j<normal_forms.size(); j++ ) { + NormalForm& nfi = normal_forms[i]; + NormalForm& nfj = normal_forms[j]; //ensure that normal_forms[i] and normal_forms[j] are the same modulo equality, add to pinfer if not Trace("strings-solve") << "Strings: Process normal form #" << i << " against #" << j << "..." << std::endl; - if( isNormalFormPair( normal_form_src[i], normal_form_src[j] ) ) { + if (isNormalFormPair(nfi.d_base, nfj.d_base)) + { Trace("strings-solve") << "Strings: Already cached." << std::endl; }else{ //process the reverse direction first (check for easy conflicts and inferences) unsigned rindex = 0; - processReverseNEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, rindex, 0, pinfer ); + nfi.reverse(); + nfj.reverse(); + processSimpleNEq(nfi, nfj, rindex, true, 0, pinfer); + nfi.reverse(); + nfj.reverse(); if( hasProcessed() ){ return; }else if( !pinfer.empty() && pinfer.back().d_id==1 ){ @@ -2911,7 +2940,7 @@ void TheoryStrings::processNEqc( std::vector< std::vector< Node > > &normal_form //rindex = 0; unsigned index = 0; - processSimpleNEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, false, rindex, pinfer ); + processSimpleNEq(nfi, nfj, index, false, rindex, pinfer); if( hasProcessed() ){ return; }else if( !pinfer.empty() && pinfer.back().d_id==1 ){ @@ -2977,86 +3006,92 @@ bool TheoryStrings::InferInfo::sendAsLemma() { return true; } -void TheoryStrings::processReverseNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, unsigned j, unsigned& index, unsigned rproc, std::vector< InferInfo >& pinfer ) { - //reverse normal form of i, j - std::reverse( normal_forms[i].begin(), normal_forms[i].end() ); - std::reverse( normal_forms[j].begin(), normal_forms[j].end() ); - - processSimpleNEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, true, rproc, pinfer ); - - //reverse normal form of i, j - std::reverse( normal_forms[i].begin(), normal_forms[i].end() ); - std::reverse( normal_forms[j].begin(), normal_forms[j].end() ); -} - -//rproc is the # is the size of suffix that is identical -void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer ) { - Assert( rproc<=normal_forms[i].size() && rproc<=normal_forms[j].size() ); +void TheoryStrings::processSimpleNEq(NormalForm& nfi, + NormalForm& nfj, + unsigned& index, + bool isRev, + unsigned rproc, + std::vector<InferInfo>& pinfer) +{ + std::vector<Node>& nfiv = nfi.d_nf; + std::vector<Node>& nfjv = nfj.d_nf; + NodeManager* nm = NodeManager::currentNM(); + Assert(rproc <= nfiv.size() && rproc <= nfjv.size()); bool success; do { success = false; //if we are at the end - if( index==(normal_forms[i].size()-rproc) || index==(normal_forms[j].size()-rproc) ){ - if( index==(normal_forms[i].size()-rproc) && index==(normal_forms[j].size()-rproc) ){ + if (index == (nfiv.size() - rproc) || index == (nfjv.size() - rproc)) + { + if (index == (nfiv.size() - rproc) && index == (nfjv.size() - rproc)) + { //we're done }else{ //the remainder must be empty - unsigned k = index==(normal_forms[i].size()-rproc) ? j : i; + NormalForm& nfk = index == (nfiv.size() - rproc) ? nfj : nfi; + std::vector<Node>& nfkv = nfk.d_nf; unsigned index_k = index; //Node eq_exp = mkAnd( curr_exp ); std::vector< Node > curr_exp; - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, -1, -1, isRev, curr_exp ); - while( !d_conflict && index_k<(normal_forms[k].size()-rproc) ){ + NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, curr_exp); + while (!d_conflict && index_k < (nfkv.size() - rproc)) + { //can infer that this string must be empty - Node eq = normal_forms[k][index_k].eqNode( d_emptyString ); + Node eq = nfkv[index_k].eqNode(d_emptyString); //Trace("strings-lemma") << "Strings: Infer " << eq << " from " << eq_exp << std::endl; - Assert( !areEqual( d_emptyString, normal_forms[k][index_k] ) ); + Assert(!areEqual(d_emptyString, nfkv[index_k])); sendInference( curr_exp, eq, "N_EndpointEmp" ); index_k++; } } }else{ - Trace("strings-solve-debug") << "Process " << normal_forms[i][index] << " ... " << normal_forms[j][index] << std::endl; - if( normal_forms[i][index]==normal_forms[j][index] ){ + Trace("strings-solve-debug") + << "Process " << nfiv[index] << " ... " << nfjv[index] << std::endl; + if (nfiv[index] == nfjv[index]) + { Trace("strings-solve-debug") << "Simple Case 1 : strings are equal" << std::endl; index++; success = true; }else{ - Assert( !areEqual(normal_forms[i][index], normal_forms[j][index]) ); + Assert(!areEqual(nfiv[index], nfjv[index])); std::vector< Node > temp_exp; - Node length_term_i = getLength( normal_forms[i][index], temp_exp ); - Node length_term_j = getLength( normal_forms[j][index], temp_exp ); - //check length(normal_forms[i][index]) == length(normal_forms[j][index]) + Node length_term_i = getLength(nfiv[index], temp_exp); + Node length_term_j = getLength(nfjv[index], temp_exp); + // check length(nfiv[index]) == length(nfjv[index]) if( areEqual( length_term_i, length_term_j ) ){ Trace("strings-solve-debug") << "Simple Case 2 : string lengths are equal" << std::endl; - Node eq = normal_forms[i][index].eqNode( normal_forms[j][index] ); + Node eq = nfiv[index].eqNode(nfjv[index]); //eq = Rewriter::rewrite( eq ); Node length_eq = length_term_i.eqNode( length_term_j ); //temp_exp.insert(temp_exp.end(), curr_exp.begin(), curr_exp.end() ); - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, temp_exp ); + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, temp_exp); temp_exp.push_back(length_eq); sendInference( temp_exp, eq, "N_Unify" ); return; - }else if( ( normal_forms[i][index].getKind()!=kind::CONST_STRING && index==normal_forms[i].size()-rproc-1 ) || - ( normal_forms[j][index].getKind()!=kind::CONST_STRING && index==normal_forms[j].size()-rproc-1 ) ){ + } + else if ((nfiv[index].getKind() != CONST_STRING + && index == nfiv.size() - rproc - 1) + || (nfjv[index].getKind() != CONST_STRING + && index == nfjv.size() - rproc - 1)) + { Trace("strings-solve-debug") << "Simple Case 3 : at endpoint" << std::endl; std::vector< Node > antec; //antec.insert(antec.end(), curr_exp.begin(), curr_exp.end() ); - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, -1, -1, isRev, antec ); + NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, antec); std::vector< Node > eqn; for( unsigned r=0; r<2; r++ ) { - int index_k = index; - int k = r==0 ? i : j; + NormalForm& nfk = r == 0 ? nfi : nfj; + std::vector<Node>& nfkv = nfk.d_nf; std::vector< Node > eqnc; - for( unsigned index_l=index_k; index_l<(normal_forms[k].size()-rproc); index_l++ ) { + for (unsigned index_l = index, size = (nfkv.size() - rproc); + index_l < size; + index_l++) + { if(isRev) { - eqnc.insert(eqnc.begin(), normal_forms[k][index_l] ); + eqnc.insert(eqnc.begin(), nfkv[index_l]); } else { - eqnc.push_back( normal_forms[k][index_l] ); + eqnc.push_back(nfkv[index_l]); } } eqn.push_back( mkConcat( eqnc ) ); @@ -3065,49 +3100,46 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal sendInference( antec, eqn[0].eqNode( eqn[1] ), "N_EndpointEq", true ); return; }else{ - Assert( normal_forms[i].size()==normal_forms[j].size() ); - index = normal_forms[i].size()-rproc; + Assert(nfiv.size() == nfjv.size()); + index = nfiv.size() - rproc; } - }else if( normal_forms[i][index].isConst() && normal_forms[j][index].isConst() ){ - Node const_str = normal_forms[i][index]; - Node other_str = normal_forms[j][index]; + } + else if (nfiv[index].isConst() && nfjv[index].isConst()) + { + Node const_str = nfiv[index]; + Node other_str = nfjv[index]; Trace("strings-solve-debug") << "Simple Case 3 : Const Split : " << const_str << " vs " << other_str << " at index " << index << ", isRev = " << isRev << std::endl; unsigned len_short = const_str.getConst<String>().size() <= other_str.getConst<String>().size() ? const_str.getConst<String>().size() : other_str.getConst<String>().size(); bool isSameFix = isRev ? const_str.getConst<String>().rstrncmp(other_str.getConst<String>(), len_short): const_str.getConst<String>().strncmp(other_str.getConst<String>(), len_short); if( isSameFix ) { //same prefix/suffix + bool constCmp = const_str.getConst<String>().size() + < other_str.getConst<String>().size(); //k is the index of the string that is shorter - int k = const_str.getConst<String>().size()<other_str.getConst<String>().size() ? i : j; - int l = const_str.getConst<String>().size()<other_str.getConst<String>().size() ? j : i; - //update the nf exp dependencies - //notice this is not critical for soundness: not doing the below incrementing will only lead to overapproximating when antecedants are required in explanations - for( std::map< Node, std::map< bool, int > >::iterator itnd = normal_forms_exp_depend[l].begin(); itnd != normal_forms_exp_depend[l].end(); ++itnd ){ - for( std::map< bool, int >::iterator itnd2 = itnd->second.begin(); itnd2 != itnd->second.end(); ++itnd2 ){ - //see if this can be incremented: it can if it is not relevant to the current index - Assert( itnd2->second>=0 && itnd2->second<=(int)normal_forms[l].size() ); - bool increment = (itnd2->first==isRev) ? itnd2->second>(int)index : ( (int)normal_forms[l].size()-1-itnd2->second )<(int)index; - if( increment ){ - normal_forms_exp_depend[l][itnd->first][itnd2->first] = itnd2->second + 1; - } - } - } + NormalForm& nfk = constCmp ? nfi : nfj; + std::vector<Node>& nfkv = nfk.d_nf; + NormalForm& nfl = constCmp ? nfj : nfi; + std::vector<Node>& nflv = nfl.d_nf; + Node remainderStr; if( isRev ){ - int new_len = normal_forms[l][index].getConst<String>().size() - len_short; - Node remainderStr = NodeManager::currentNM()->mkConst( normal_forms[l][index].getConst<String>().substr(0, new_len) ); - Trace("strings-solve-debug-test") << "Break normal form of " << normal_forms[l][index] << " into " << normal_forms[k][index] << ", " << remainderStr << std::endl; - normal_forms[l].insert( normal_forms[l].begin()+index + 1, remainderStr ); + int new_len = nflv[index].getConst<String>().size() - len_short; + remainderStr = nm->mkConst( + nflv[index].getConst<String>().substr(0, new_len)); }else{ - Node remainderStr = NodeManager::currentNM()->mkConst(normal_forms[l][index].getConst<String>().substr(len_short)); - Trace("strings-solve-debug-test") << "Break normal form of " << normal_forms[l][index] << " into " << normal_forms[k][index] << ", " << remainderStr << std::endl; - normal_forms[l].insert( normal_forms[l].begin()+index + 1, remainderStr ); + remainderStr = + nm->mkConst(nflv[index].getConst<String>().substr(len_short)); } - normal_forms[l][index] = normal_forms[k][index]; + Trace("strings-solve-debug-test") + << "Break normal form of " << nflv[index] << " into " + << nfkv[index] << ", " << remainderStr << std::endl; + nfl.splitConstant(index, nfkv[index], remainderStr); index++; success = true; }else{ //conflict std::vector< Node > antec; - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, antec ); + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, antec); sendInference( antec, d_false, "N_Const", true ); return; } @@ -3116,17 +3148,19 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal InferInfo info; info.d_index = index; //for debugging - info.d_i = i; - info.d_j = j; + info.d_i = nfi.d_base; + info.d_j = nfj.d_base; info.d_rev = isRev; bool info_valid = false; - Assert( index<normal_forms[i].size()-rproc && index<normal_forms[j].size()-rproc ); + Assert(index < nfiv.size() - rproc && index < nfjv.size() - rproc); std::vector< Node > lexp; - Node length_term_i = getLength( normal_forms[i][index], lexp ); - Node length_term_j = getLength( normal_forms[j][index], lexp ); + Node length_term_i = getLength(nfiv[index], lexp); + Node length_term_j = getLength(nfjv[index], lexp); //split on equality between string lengths (note that splitting on equality between strings is worse since it is harder to process) - if( !areDisequal( length_term_i, length_term_j ) && !areEqual( length_term_i, length_term_j ) && - normal_forms[i][index].getKind()!=kind::CONST_STRING && normal_forms[j][index].getKind()!=kind::CONST_STRING ){ //AJR: remove the latter 2 conditions? + if (!areDisequal(length_term_i, length_term_j) + && !areEqual(length_term_i, length_term_j) + && !nfiv[index].isConst() && !nfjv[index].isConst()) + { // AJR: remove the latter 2 conditions? Trace("strings-solve-debug") << "Non-simple Case 1 : string lengths neither equal nor disequal" << std::endl; //try to make the lengths equal via splitting on demand Node length_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, length_term_i, length_term_j ); @@ -3141,33 +3175,34 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal int loop_in_i = -1; int loop_in_j = -1; ProcessLoopResult plr = ProcessLoopResult::SKIPPED; - if( detectLoop( normal_forms, i, j, index, loop_in_i, loop_in_j, rproc ) ){ + if (detectLoop(nfi, nfj, index, loop_in_i, loop_in_j, rproc)) + { if( !isRev ){ //FIXME - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, -1, -1, isRev, info.d_ant ); - //set info - plr = processLoop(normal_forms, - normal_form_src, - i, - j, - loop_in_i != -1 ? i : j, - loop_in_i != -1 ? j : i, - loop_in_i != -1 ? loop_in_i : loop_in_j, - index, - info); - if (plr == ProcessLoopResult::INFERENCE) - { - info_valid = true; - } + NormalForm::getExplanationForPrefixEq( + nfi, nfj, -1, -1, info.d_ant); + // set info + plr = processLoop(loop_in_i != -1 ? nfi : nfj, + loop_in_i != -1 ? nfj : nfi, + loop_in_i != -1 ? loop_in_i : loop_in_j, + index, + info); + if (plr == ProcessLoopResult::INFERENCE) + { + info_valid = true; + } } } if (plr == ProcessLoopResult::SKIPPED) { //AJR: length entailment here? - if( normal_forms[i][index].getKind() == kind::CONST_STRING || normal_forms[j][index].getKind() == kind::CONST_STRING ){ - unsigned const_k = normal_forms[i][index].getKind() == kind::CONST_STRING ? i : j; - unsigned nconst_k = normal_forms[i][index].getKind() == kind::CONST_STRING ? j : i; - Node other_str = normal_forms[nconst_k][index]; + if (nfiv[index].isConst() || nfjv[index].isConst()) + { + NormalForm& nfc = nfiv[index].isConst() ? nfi : nfj; + std::vector<Node>& nfcv = nfc.d_nf; + NormalForm& nfnc = nfiv[index].isConst() ? nfj : nfi; + std::vector<Node>& nfncv = nfnc.d_nf; + Node other_str = nfncv[index]; Assert( other_str.getKind()!=kind::CONST_STRING, "Other string is not constant." ); Assert( other_str.getKind()!=kind::STRING_CONCAT, "Other string is not CONCAT." ); if( !d_equalityEngine.areDisequal( other_str, d_emptyString, true ) ){ @@ -3180,12 +3215,15 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal if( !isRev ){ //FIXME Node xnz = other_str.eqNode( d_emptyString ).negate(); unsigned index_nc_k = index+1; - //Node next_const_str = TheoryStringsRewriter::collectConstantStringAt( normal_forms[nconst_k], index_nc_k, false ); unsigned start_index_nc_k = index+1; - Node next_const_str = TheoryStringsRewriter::getNextConstantAt( normal_forms[nconst_k], start_index_nc_k, index_nc_k, false ); + Node next_const_str = + TheoryStringsRewriter::getNextConstantAt( + nfncv, start_index_nc_k, index_nc_k, false); if( !next_const_str.isNull() ) { unsigned index_c_k = index; - Node const_str = TheoryStringsRewriter::collectConstantStringAt( normal_forms[const_k], index_c_k, false ); + Node const_str = + TheoryStringsRewriter::collectConstantStringAt( + nfcv, index_c_k, false); Assert( !const_str.isNull() ); CVC4::String stra = const_str.getConst<String>(); CVC4::String strb = next_const_str.getConst<String>(); @@ -3208,9 +3246,9 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal } if( p>1 ){ if( start_index_nc_k==index+1 ){ - info.d_ant.push_back( xnz ); - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, - const_k, nconst_k, index_c_k, index_nc_k, isRev, info.d_ant ); + info.d_ant.push_back(xnz); + NormalForm::getExplanationForPrefixEq( + nfc, nfnc, index_c_k, index_nc_k, info.d_ant); Node prea = p==stra.size() ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( p ) : stra.prefix( p ) ); Node sk = d_sk_cache.mkSkolemCached( other_str, @@ -3225,24 +3263,13 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal info.d_id = INFER_SSPLIT_CST_PROP; info_valid = true; } - /* FIXME for isRev, speculative - else if( options::stringLenPropCsp() ){ - //propagate length constraint - std::vector< Node > cc; - for( unsigned i=index; i<start_index_nc_k; i++ ){ - cc.push_back( normal_forms[nconst_k][i] ); - } - Node lt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, mkConcat( cc ) ); - conc = NodeManager::currentNM()->mkNode( kind::GEQ, lt, NodeManager::currentNM()->mkConst( Rational(p) ) ); - sendInference( ant, conc, "S-Split(CSP-P)-lprop", true ); - } - */ } } if( !info_valid ){ info.d_ant.push_back( xnz ); - Node const_str = normal_forms[const_k][index]; - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, info.d_ant ); + Node const_str = nfcv[index]; + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, info.d_ant); CVC4::String stra = const_str.getConst<String>(); if( options::stringBinaryCsp() && stra.size()>3 ){ //split string in half @@ -3285,7 +3312,7 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal if( options::stringCheckEntailLen() ){ //check entailment for( unsigned e=0; e<2; e++ ){ - Node t = e==0 ? normal_forms[i][index] : normal_forms[j][index]; + Node t = e == 0 ? nfiv[index] : nfjv[index]; //do not infer constants are larger than variables if( t.getKind()!=kind::CONST_STRING ){ Node lt1 = e==0 ? length_term_i : length_term_j; @@ -3302,11 +3329,12 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal } } } - - getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, i, j, index, index, isRev, info.d_ant ); + + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, info.d_ant); //x!=e /\ y!=e for(unsigned xory=0; xory<2; xory++) { - Node x = xory==0 ? normal_forms[i][index] : normal_forms[j][index]; + Node x = xory == 0 ? nfiv[index] : nfjv[index]; Node xgtz = x.eqNode( d_emptyString ).negate(); if( d_equalityEngine.areDisequal( x, d_emptyString, true ) ) { info.d_ant.push_back( xgtz ); @@ -3315,15 +3343,19 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal } } Node sk = d_sk_cache.mkSkolemCached( - normal_forms[i][index], - normal_forms[j][index], + nfiv[index], + nfjv[index], isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, "v_spt"); // must add length requirement info.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); - Node eq1 = normal_forms[i][index].eqNode( isRev ? mkConcat(sk, normal_forms[j][index]) : mkConcat(normal_forms[j][index], sk) ); - Node eq2 = normal_forms[j][index].eqNode( isRev ? mkConcat(sk, normal_forms[i][index]) : mkConcat(normal_forms[i][index], sk) ); + Node eq1 = + nfiv[index].eqNode(isRev ? mkConcat(sk, nfjv[index]) + : mkConcat(nfjv[index], sk)); + Node eq2 = + nfjv[index].eqNode(isRev ? mkConcat(sk, nfiv[index]) + : mkConcat(nfiv[index], sk)); if( lentTestSuccess!=-1 ){ info.d_antn.push_back( lentTestExp ); @@ -3355,15 +3387,26 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal }while( success ); } -bool TheoryStrings::detectLoop( std::vector< std::vector< Node > > &normal_forms, int i, int j, int index, int &loop_in_i, int &loop_in_j, unsigned rproc ){ +bool TheoryStrings::detectLoop(NormalForm& nfi, + NormalForm& nfj, + int index, + int& loop_in_i, + int& loop_in_j, + unsigned rproc) +{ int has_loop[2] = { -1, -1 }; if( options::stringLB() != 2 ) { for( unsigned r=0; r<2; r++ ) { - int n_index = (r==0 ? i : j); - int other_n_index = (r==0 ? j : i); - if( normal_forms[other_n_index][index].getKind() != kind::CONST_STRING ) { - for( unsigned lp = index+1; lp<normal_forms[n_index].size()-rproc; lp++ ){ - if( normal_forms[n_index][lp]==normal_forms[other_n_index][index] ){ + NormalForm& nf = r == 0 ? nfi : nfj; + NormalForm& nfo = r == 0 ? nfj : nfi; + std::vector<Node>& nfv = nf.d_nf; + std::vector<Node>& nfov = nfo.d_nf; + if (!nfov[index].isConst()) + { + for (unsigned lp = index + 1; lp < nfv.size() - rproc; lp++) + { + if (nfv[lp] == nfov[index]) + { has_loop[r] = lp; break; } @@ -3382,16 +3425,11 @@ bool TheoryStrings::detectLoop( std::vector< std::vector< Node > > &normal_forms } //xs(zy)=t(yz)xr -TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( - const std::vector<std::vector<Node> >& normal_forms, - const std::vector<Node>& normal_form_src, - int i, - int j, - int loop_n_index, - int other_n_index, - int loop_index, - int index, - InferInfo& info) +TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(NormalForm& nfi, + NormalForm& nfj, + int loop_index, + int index, + InferInfo& info) { if (options::stringProcessLoopMode() == ProcessLoopMode::ABORT) { @@ -3405,18 +3443,17 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( NodeManager* nm = NodeManager::currentNM(); Node conc; - Trace("strings-loop") << "Detected possible loop for " - << normal_forms[loop_n_index][loop_index] << std::endl; - Trace("strings-loop") << " ... (X)= " << normal_forms[other_n_index][index] - << std::endl; + const std::vector<Node>& veci = nfi.d_nf; + const std::vector<Node>& vecoi = nfj.d_nf; + Trace("strings-loop") << "Detected possible loop for " << veci[loop_index] + << std::endl; + Trace("strings-loop") << " ... (X)= " << vecoi[index] << std::endl; Trace("strings-loop") << " ... T(Y.Z)= "; - const std::vector<Node>& veci = normal_forms[loop_n_index]; std::vector<Node> vec_t(veci.begin() + index, veci.begin() + loop_index); Node t_yz = mkConcat(vec_t); Trace("strings-loop") << " (" << t_yz << ")" << std::endl; Trace("strings-loop") << " ... S(Z.Y)= "; - const std::vector<Node>& vecoi = normal_forms[other_n_index]; std::vector<Node> vec_s(vecoi.begin() + index + 1, vecoi.end()); Node s_zy = mkConcat(vec_s); Trace("strings-loop") << s_zy << std::endl; @@ -3453,7 +3490,7 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( Node split_eq; for (unsigned r = 0; r < 2; r++) { - Node t = r == 0 ? normal_forms[loop_n_index][loop_index] : t_yz; + Node t = r == 0 ? veci[loop_index] : t_yz; split_eq = t.eqNode(d_emptyString); Node split_eqr = Rewriter::rewrite(split_eq); // the equality could rewrite to false @@ -3486,16 +3523,14 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( && s_zy.getConst<String>().isRepeated()) { Node rep_c = nm->mkConst(s_zy.getConst<String>().substr(0, 1)); - Trace("strings-loop") << "Special case (X)=" - << normal_forms[other_n_index][index] << " " + Trace("strings-loop") << "Special case (X)=" << vecoi[index] << " " << std::endl; Trace("strings-loop") << "... (C)=" << rep_c << " " << std::endl; // special case - str_in_re = - nm->mkNode(kind::STRING_IN_REGEXP, - normal_forms[other_n_index][index], - nm->mkNode(kind::REGEXP_STAR, - nm->mkNode(kind::STRING_TO_REGEXP, rep_c))); + str_in_re = nm->mkNode( + STRING_IN_REGEXP, + vecoi[index], + nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, rep_c))); conc = str_in_re; } else if (t_yz.isConst()) @@ -3528,12 +3563,12 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( continue; } Node conc2 = nm->mkNode( - kind::STRING_IN_REGEXP, - normal_forms[other_n_index][index], - nm->mkNode(kind::REGEXP_CONCAT, - nm->mkNode(kind::STRING_TO_REGEXP, y), - nm->mkNode(kind::REGEXP_STAR, - nm->mkNode(kind::STRING_TO_REGEXP, restr)))); + STRING_IN_REGEXP, + vecoi[index], + nm->mkNode( + REGEXP_CONCAT, + nm->mkNode(STRING_TO_REGEXP, y), + nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, restr)))); cc = cc == d_true ? conc2 : nm->mkNode(kind::AND, cc, conc2); vconc.push_back(cc); } @@ -3566,8 +3601,7 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( vec_r.insert(vec_r.begin(), sk_y); vec_r.insert(vec_r.begin(), sk_z); Node conc2 = s_zy.eqNode(mkConcat(vec_r)); - Node conc3 = - normal_forms[other_n_index][index].eqNode(mkConcat(sk_y, sk_w)); + Node conc3 = vecoi[index].eqNode(mkConcat(sk_y, sk_w)); Node restr = r == d_emptyString ? s_zy : mkConcat(sk_z, sk_y); str_in_re = nm->mkNode(kind::STRING_IN_REGEXP, @@ -3587,19 +3621,22 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( // we will be done info.d_conc = conc; info.d_id = INFER_FLOOP; - info.d_nf_pair[0] = normal_form_src[i]; - info.d_nf_pair[1] = normal_form_src[j]; + info.d_nf_pair[0] = nfi.d_base; + info.d_nf_pair[1] = nfj.d_base; return ProcessLoopResult::INFERENCE; } //return true for lemma, false if we succeed void TheoryStrings::processDeq( Node ni, Node nj ) { //Assert( areDisequal( ni, nj ) ); - if( d_normal_forms[ni].size()>1 || d_normal_forms[nj].size()>1 ){ + NormalForm& nfni = getNormalForm(ni); + NormalForm& nfnj = getNormalForm(nj); + if (nfni.d_nf.size() > 1 || nfnj.d_nf.size() > 1) + { std::vector< Node > nfi; - nfi.insert( nfi.end(), d_normal_forms[ni].begin(), d_normal_forms[ni].end() ); + nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end()); std::vector< Node > nfj; - nfj.insert( nfj.end(), d_normal_forms[nj].begin(), d_normal_forms[nj].end() ); + nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end()); int revRet = processReverseDeq( nfi, nfj, ni, nj ); if( revRet!=0 ){ @@ -3607,9 +3644,9 @@ void TheoryStrings::processDeq( Node ni, Node nj ) { } nfi.clear(); - nfi.insert( nfi.end(), d_normal_forms[ni].begin(), d_normal_forms[ni].end() ); + nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end()); nfj.clear(); - nfj.insert( nfj.end(), d_normal_forms[nj].begin(), d_normal_forms[nj].end() ); + nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end()); unsigned index = 0; while( index<nfi.size() || index<nfj.size() ){ @@ -3665,8 +3702,10 @@ void TheoryStrings::processDeq( Node ni, Node nj ) { eq1 = Rewriter::rewrite( eq1 ); Node eq2 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, firstChar, skr ) ); std::vector< Node > antec; - antec.insert( antec.end(), d_normal_forms_exp[ni].begin(), d_normal_forms_exp[ni].end() ); - antec.insert( antec.end(), d_normal_forms_exp[nj].begin(), d_normal_forms_exp[nj].end() ); + antec.insert( + antec.end(), nfni.d_exp.begin(), nfni.d_exp.end()); + antec.insert( + antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); antec.push_back( nconst_k.eqNode( d_emptyString ).negate() ); sendInference( antec, NodeManager::currentNM()->mkNode( kind::OR, NodeManager::currentNM()->mkNode( kind::AND, eq1, sk.eqNode( firstChar ).negate() ), eq2 ), "D-DISL-CSplit" ); @@ -3679,8 +3718,8 @@ void TheoryStrings::processDeq( Node ni, Node nj ) { //must add lemma std::vector< Node > antec; std::vector< Node > antec_new_lits; - antec.insert( antec.end(), d_normal_forms_exp[ni].begin(), d_normal_forms_exp[ni].end() ); - antec.insert( antec.end(), d_normal_forms_exp[nj].begin(), d_normal_forms_exp[nj].end() ); + antec.insert(antec.end(), nfni.d_exp.begin(), nfni.d_exp.end()); + antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); //check disequal if( areDisequal( ni, nj ) ){ antec.push_back( ni.eqNode( nj ).negate() ); @@ -3767,16 +3806,18 @@ int TheoryStrings::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node } } } + NormalForm& nfni = getNormalForm(ni); + NormalForm& nfnj = getNormalForm(nj); while( index<nfi.size() || index<nfj.size() ) { if( index>=nfi.size() || index>=nfj.size() ){ Trace("strings-solve-debug") << "Disequality normalize empty" << std::endl; std::vector< Node > ant; //we have a conflict : because the lengths are equal, the remainder needs to be empty, which will lead to a conflict - Node lni = getLengthExp( ni, ant, d_normal_forms_base[ni] ); - Node lnj = getLengthExp( nj, ant, d_normal_forms_base[nj] ); + Node lni = getLengthExp(ni, ant, nfni.d_base); + Node lnj = getLengthExp(nj, ant, nfnj.d_base); ant.push_back( lni.eqNode( lnj ) ); - ant.insert( ant.end(), d_normal_forms_exp[ni].begin(), d_normal_forms_exp[ni].end() ); - ant.insert( ant.end(), d_normal_forms_exp[nj].begin(), d_normal_forms_exp[nj].end() ); + ant.insert(ant.end(), nfni.d_exp.begin(), nfni.d_exp.end()); + ant.insert(ant.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); std::vector< Node > cc; std::vector< Node >& nfk = index>=nfi.size() ? nfj : nfi; for( unsigned index_k=index; index_k<nfk.size(); index_k++ ){ @@ -4376,18 +4417,6 @@ Node TheoryStrings::mkAnd( std::vector< Node >& a ) { } } -void TheoryStrings::getConcatVec( Node n, std::vector< Node >& c ) { - if( n.getKind()==kind::STRING_CONCAT ) { - for( unsigned i=0; i<n.getNumChildren(); i++ ) { - if( !areEqual( n[i], d_emptyString ) ) { - c.push_back( n[i] ); - } - } - }else{ - c.push_back( n ); - } -} - void TheoryStrings::checkNormalFormsDeq() { std::vector< std::vector< Node > > cols; @@ -4422,20 +4451,28 @@ void TheoryStrings::checkNormalFormsDeq() separateByLength( d_strings_eqc, cols, lts ); for( unsigned i=0; i<cols.size(); i++ ){ if( cols[i].size()>1 && d_lemma_cache.empty() ){ - Trace("strings-solve") << "- Verify disequalities are processed for " << cols[i][0] << ", normal form : "; - printConcat( d_normal_forms[cols[i][0]], "strings-solve" ); - Trace("strings-solve") << "... #eql = " << cols[i].size() << std::endl; + if (Trace.isOn("strings-solve")) + { + Trace("strings-solve") << "- Verify disequalities are processed for " + << cols[i][0] << ", normal form : "; + printConcat(getNormalForm(cols[i][0]).d_nf, "strings-solve"); + Trace("strings-solve") + << "... #eql = " << cols[i].size() << std::endl; + } //must ensure that normal forms are disequal for( unsigned j=0; j<cols[i].size(); j++ ){ for( unsigned k=(j+1); k<cols[i].size(); k++ ){ //for strings that are disequal, but have the same length if( areDisequal( cols[i][j], cols[i][k] ) ){ Assert( !d_conflict ); - Trace("strings-solve") << "- Compare " << cols[i][j] << " "; - printConcat( d_normal_forms[cols[i][j]], "strings-solve" ); - Trace("strings-solve") << " against " << cols[i][k] << " "; - printConcat( d_normal_forms[cols[i][k]], "strings-solve" ); - Trace("strings-solve") << "..." << std::endl; + if (Trace.isOn("strings-solve")) + { + Trace("strings-solve") << "- Compare " << cols[i][j] << " "; + printConcat(getNormalForm(cols[i][j]).d_nf, "strings-solve"); + Trace("strings-solve") << " against " << cols[i][k] << " "; + printConcat(getNormalForm(cols[i][k]).d_nf, "strings-solve"); + Trace("strings-solve") << "..." << std::endl; + } processDeq( cols[i][j], cols[i][k] ); if( hasProcessed() ){ return; @@ -4451,7 +4488,7 @@ void TheoryStrings::checkNormalFormsDeq() void TheoryStrings::checkLengthsEqc() { if( options::stringLenNorm() ){ for( unsigned i=0; i<d_strings_eqc.size(); i++ ){ - //if( d_normal_forms[nodes[i]].size()>1 ) { + NormalForm& nfi = getNormalForm(d_strings_eqc[i]); Trace("strings-process-debug") << "Process length constraints for " << d_strings_eqc[i] << std::endl; //check if there is a length term for this equivalence class EqcInfo* ei = getOrMakeEqcInfo( d_strings_eqc[i], false ); @@ -4460,19 +4497,22 @@ void TheoryStrings::checkLengthsEqc() { Node llt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt ); //now, check if length normalization has occurred if( ei->d_normalized_length.get().isNull() ) { - Node nf = mkConcat( d_normal_forms[d_strings_eqc[i]] ); + Node nf = mkConcat(nfi.d_nf); if( Trace.isOn("strings-process-debug") ){ - Trace("strings-process-debug") << " normal form is " << nf << " from base " << d_normal_forms_base[d_strings_eqc[i]] << std::endl; + Trace("strings-process-debug") + << " normal form is " << nf << " from base " << nfi.d_base + << std::endl; Trace("strings-process-debug") << " normal form exp is: " << std::endl; - for( unsigned j=0; j<d_normal_forms_exp[d_strings_eqc[i]].size(); j++ ){ - Trace("strings-process-debug") << " " << d_normal_forms_exp[d_strings_eqc[i]][j] << std::endl; + for (const Node& exp : nfi.d_exp) + { + Trace("strings-process-debug") << " " << exp << std::endl; } } //if not, add the lemma std::vector< Node > ant; - ant.insert( ant.end(), d_normal_forms_exp[d_strings_eqc[i]].begin(), d_normal_forms_exp[d_strings_eqc[i]].end() ); - ant.push_back( d_normal_forms_base[d_strings_eqc[i]].eqNode( lt ) ); + ant.insert(ant.end(), nfi.d_exp.begin(), nfi.d_exp.end()); + ant.push_back(nfi.d_base.eqNode(lt)); Node lc = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, nf ); Node lcr = Rewriter::rewrite( lc ); Trace("strings-process-debug") << "Rewrote length " << lc << " to " << lcr << std::endl; @@ -4485,7 +4525,7 @@ void TheoryStrings::checkLengthsEqc() { }else{ Trace("strings-process-debug") << "No length term for eqc " << d_strings_eqc[i] << " " << d_eqc_to_len_term[d_strings_eqc[i]] << std::endl; if( !options::stringEagerLen() ){ - Node c = mkConcat( d_normal_forms[d_strings_eqc[i]] ); + Node c = mkConcat(nfi.d_nf); registerTerm( c, 3 ); /* if( !c.isConst() ){ diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index b3cb323ae..e9d82a7b2 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -24,6 +24,7 @@ #include "expr/attribute.h" #include "expr/node_trie.h" #include "theory/decision_manager.h" +#include "theory/strings/normal_form.h" #include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/regexp_solver.h" @@ -307,11 +308,10 @@ class TheoryStrings : public Theory { /** inferences: maintained to ensure ref count for internally introduced nodes */ NodeList d_infer; NodeList d_infer_exp; - /** normal forms */ - std::map< Node, Node > d_normal_forms_base; - std::map< Node, std::vector< Node > > d_normal_forms; - std::map< Node, std::vector< Node > > d_normal_forms_exp; - std::map< Node, std::map< Node, std::map< bool, int > > > d_normal_forms_exp_depend; + /** map from terms to their normal forms */ + std::map<Node, NormalForm> d_normal_form; + /** get normal form */ + NormalForm& getNormalForm(Node n); //map of pairs of terms that have the same normal form NodeIntMap d_nf_pairs; std::map< Node, std::vector< Node > > d_nf_pairs_data; @@ -468,8 +468,14 @@ private: class InferInfo { public: - unsigned d_i; - unsigned d_j; + /** for debugging + * + * The base pair of strings d_i/d_j that led to the inference, and whether + * (d_rev) we were processing the normal forms of these strings in reverse + * direction. + */ + Node d_i; + Node d_j; bool d_rev; std::vector<Node> d_ant; std::vector<Node> d_antn; @@ -554,10 +560,77 @@ private: //--------------------------end for checkCycles //--------------------------for checkNormalFormsEq + /** normalize equivalence class + * + * This method attempts to build a "normal form" for the equivalence class + * of string term n (for more details on normal forms, see normal_form.h + * or see Liang et al CAV 2014). In particular, this method checks whether the + * current normal form for each term in this equivalence class is identical. + * If it is not, then we add an inference via sendInference and abort the + * call. + */ void normalizeEquivalenceClass( Node n ); - void getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ); - bool detectLoop( std::vector< std::vector< Node > > &normal_forms, int i, int j, int index, int &loop_in_i, int &loop_in_j, unsigned rproc ); + /** + * For each term in the equivalence class of eqc, this adds data regarding its + * normal form to normal_forms. The map term_to_nf_index maps terms to the + * index in normal_forms where their normal form data is located. + */ + void getNormalForms(Node eqc, + std::vector<NormalForm>& normal_forms, + std::map<Node, unsigned>& term_to_nf_index); + /** process normalize equivalence class + * + * This is called when an equivalence class contains a set of terms that + * have normal forms given by the argument normal_forms. It either + * verifies that all normal forms in this vector are identical, or otherwise + * adds a conflict, lemma, or inference via the sendInference method. + * + * To prioritize one inference versus another, it builds a set of possible + * inferences, at most two for each pair of distinct normal forms, + * corresponding to processing the normal form pair in the (forward, reverse) + * directions. Once all possible inferences are recorded, it executes the + * one with highest priority based on the enumeration type Inference. + */ + void processNEqc(std::vector<NormalForm>& normal_forms); + /** process simple normal equality + * + * This method is called when two equal terms have normal forms nfi and nfj. + * It adds (typically at most one) possible inference to the vector pinfer. + * This inference is in the form of an InferInfo object, which stores the + * necessary information regarding how to process the inference. + * + * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that + * we are currently checking. This method will increment this index until + * it finds an index where these vectors differ, or until it reaches the + * end of these vectors. + * isRev: Whether we are processing the normal forms in reverse direction. + * Notice in this case the normal form vectors have been reversed, hence, + * many operations are identical to the forward case, e.g. index is + * incremented not decremented, while others require special care, e.g. + * constant strings "ABC" in the normal form vectors are not reversed to + * "CBA" and hence all operations should assume a flipped semantics for + * constants when isRev is true, + * rproc: the number of string components on the suffix of the normal form of + * nfi and nfj that were already processed. This is used when using + * fowards/backwards traversals of normal forms to ensure that duplicate + * inferences are not processed. + * pinfer: the set of possible inferences we add to. + */ + void processSimpleNEq(NormalForm& nfi, + NormalForm& nfj, + unsigned& index, + bool isRev, + unsigned rproc, + std::vector<InferInfo>& pinfer); + //--------------------------end for checkNormalFormsEq + + //--------------------------for checkNormalFormsEq with loops + bool detectLoop(NormalForm& nfi, + NormalForm& nfj, + int index, + int& loop_in_i, + int& loop_in_j, + unsigned rproc); /** * Result of processLoop() below. @@ -572,36 +645,17 @@ private: SKIPPED, }; - ProcessLoopResult processLoop( - const std::vector<std::vector<Node> >& normal_forms, - const std::vector<Node>& normal_form_src, - int i, - int j, - int loop_n_index, - int other_n_index, - int loop_index, - int index, - InferInfo& info); - - void processNEqc( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ); - void processReverseNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, unsigned j, unsigned& index, unsigned rproc, std::vector< InferInfo >& pinfer ); - void processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer ); - //--------------------------end for checkNormalFormsEq + ProcessLoopResult processLoop(NormalForm& nfi, + NormalForm& nfj, + int loop_index, + int index, + InferInfo& info); + //--------------------------end for checkNormalFormsEq with loops //--------------------------for checkNormalFormsDeq void processDeq( Node n1, Node n2 ); int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj ); int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ); - void getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, int index, bool isRev, std::vector< Node >& curr_exp ); - void getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, - std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, - unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp ); //--------------------------end for checkNormalFormsDeq //--------------------------------for checkMemberships @@ -787,8 +841,6 @@ private: protected: /** mkAnd **/ Node mkAnd(std::vector<Node>& a); - /** get concat vector */ - void getConcatVec(Node n, std::vector<Node>& c); /** get equivalence classes * |