diff options
author | Andres Noetzli <andres.noetzli@gmail.com> | 2020-05-08 22:42:22 -0700 |
---|---|---|
committer | Andres Noetzli <andres.noetzli@gmail.com> | 2020-05-11 22:22:48 -0700 |
commit | 5a610d9d82a40ba9e78a1a948c82aa42e7b961ae (patch) | |
tree | fd69a4b1b52d1ef5ec8723c537f6179913d5be33 | |
parent | ea7a9c71305bad749f4e148097585fdcc28004f8 (diff) |
Add skolem stats and sharing togglefmcad2020-strings
-rw-r--r-- | src/options/strings_options.toml | 9 | ||||
-rw-r--r-- | src/theory/strings/core_solver.cpp | 27 | ||||
-rw-r--r-- | src/theory/strings/extf_solver.cpp | 4 | ||||
-rw-r--r-- | src/theory/strings/regexp_elim.cpp | 9 | ||||
-rw-r--r-- | src/theory/strings/sequences_stats.cpp | 8 | ||||
-rw-r--r-- | src/theory/strings/sequences_stats.h | 3 | ||||
-rw-r--r-- | src/theory/strings/skolem_cache.cpp | 199 | ||||
-rw-r--r-- | src/theory/strings/skolem_cache.h | 167 | ||||
-rw-r--r-- | src/theory/strings/term_registry.cpp | 3 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_preprocess.cpp | 38 |
10 files changed, 288 insertions, 179 deletions
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index 44ca2c390..509ac0c36 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -195,6 +195,15 @@ header = "options/strings_options.h" help = "do flat form inferences" [[option]] + name = "skolemSharing" + category = "expert" + long = "skolem-sharing" + type = "bool" + default = "true" + read_only = true + help = "use skolem sharing" + +[[option]] name = "stringRegExpInterMode" category = "expert" long = "re-inter-mode=MODE" diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index ab528ac03..942c2c0a3 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -1263,7 +1263,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // purify variable for this string to communicate that // we have inferred whether it is empty. SkolemCache* skc = d_termReg.getSkolemCache(); - Node p = skc->mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym"); + Node p = skc->mkSkolemCached(nc, SkolemId::SK_PURIFY, "lsym"); Node pEq = p.eqNode(emp); // should not be constant Assert(!Rewriter::rewrite(pEq).isConst()); @@ -1341,7 +1341,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Node sk = skc->mkSkolemCached( nc, prea, - isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT, + isRev ? SkolemId::SK_ID_C_SPT_REV : SkolemId::SK_ID_C_SPT, "c_spt"); Trace("strings-csp") << "Const Split: " << prea << " is removed from " << stra @@ -1367,7 +1367,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, SkolemCache* skc = d_termReg.getSkolemCache(); Node sk = skc->mkSkolemCached( nc, - isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT, + isRev ? SkolemId::SK_ID_VC_SPT_REV : SkolemId::SK_ID_VC_SPT, "c_spt"); Trace("strings-csp") << "Const Split: " << firstChar << " is removed from " << stra << " (serial) " @@ -1442,8 +1442,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, SkolemCache* skc = d_termReg.getSkolemCache(); Node sk = skc->mkSkolemCached(x, y, - isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV - : SkolemCache::SK_ID_V_UNIFIED_SPT, + isRev ? SkolemId::SK_ID_V_UNIFIED_SPT_REV + : SkolemId::SK_ID_V_UNIFIED_SPT, "v_spt"); iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); Node eq1 = @@ -1856,11 +1856,10 @@ void CoreSolver::processDeq(Node ni, Node nj) // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" ---> // x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++ k2) SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = - skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt"); + Node sk = skc->mkSkolemCached(nck, SkolemId::SK_ID_DC_SPT, "dc_spt"); d_termReg.registerTermAtomic(sk, LENGTH_ONE); Node skr = skc->mkSkolemCached( - nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem"); + nck, SkolemId::SK_ID_DC_SPT_REM, "dc_spt_rem"); Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr)); eq1 = Rewriter::rewrite(eq1); Node eq2 = @@ -1904,14 +1903,10 @@ void CoreSolver::processDeq(Node ni, Node nj) std::vector<Node> conc; SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk1 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); - Node sk2 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); - Node sk3 = - skc->mkSkolemCached(y, x, SkolemCache::SK_ID_V_SPT, "z_dsplit"); - Node sk4 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_V_SPT, "w_dsplit"); + Node sk1 = skc->mkSkolemCached(x, y, SkolemId::SK_ID_DEQ_X, "x_dsplit"); + Node sk2 = skc->mkSkolemCached(x, y, SkolemId::SK_ID_DEQ_Y, "y_dsplit"); + Node sk3 = skc->mkSkolemCached(y, x, SkolemId::SK_ID_V_SPT, "z_dsplit"); + Node sk4 = skc->mkSkolemCached(x, y, SkolemId::SK_ID_V_SPT, "w_dsplit"); d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE); d_termReg.registerTermAtomic(sk4, LENGTH_GEQ_ONE); Node sk1Len = utils::mkNLength(sk1); diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index dd4144313..a6e603d12 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -161,8 +161,8 @@ bool ExtfSolver::doReduction(int effort, Node n) Node s = n[1]; // positive contains reduces to a equality SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk1 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1"); - Node sk2 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2"); + Node sk1 = skc->mkSkolemCached(x, s, SkolemId::SK_FIRST_CTN_PRE, "sc1"); + Node sk2 = skc->mkSkolemCached(x, s, SkolemId::SK_FIRST_CTN_POST, "sc2"); Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2))); std::vector<Node> exp_vec; exp_vec.push_back(n); diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp index d4f301e23..226a97b30 100644 --- a/src/theory/strings/regexp_elim.cpp +++ b/src/theory/strings/regexp_elim.cpp @@ -232,7 +232,14 @@ Node RegExpElimination::eliminateConcat(Node atom) prev_end = nm->mkNode(PLUS, prev_end, k); } Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end); - Node idofFind = curr.eqNode(d_neg_one).negate(); + Node idofFind = nm->mkNode( + STRING_STRCTN, + nm->mkNode( + STRING_SUBSTR, + x, + prev_end, + nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), prev_end)), + sc); conj.push_back(idofFind); prev_end = nm->mkNode(PLUS, curr, lensc); } diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp index c830a68bd..23091ed07 100644 --- a/src/theory/strings/sequences_stats.cpp +++ b/src/theory/strings/sequences_stats.cpp @@ -38,7 +38,9 @@ SequencesStatistics::SequencesStatistics() d_lemmasRegisterTerm("theory::strings::lemmasRegisterTerm", 0), d_lemmasRegisterTermAtomic("theory::strings::lemmasRegisterTermAtomic", 0), - d_lemmasInfer("theory::strings::lemmasInfer", 0) + d_lemmasInfer("theory::strings::lemmasInfer", 0), + d_numCachedSkolemsPre("theory::strings::NumCachedSkolemsPre"), + d_numCachedSkolemsPost("theory::strings::NumCachedSkolemsPost") { smtStatisticsRegistry()->registerStat(&d_checkRuns); smtStatisticsRegistry()->registerStat(&d_strategyRuns); @@ -56,6 +58,8 @@ SequencesStatistics::SequencesStatistics() smtStatisticsRegistry()->registerStat(&d_lemmasRegisterTerm); smtStatisticsRegistry()->registerStat(&d_lemmasRegisterTermAtomic); smtStatisticsRegistry()->registerStat(&d_lemmasInfer); + smtStatisticsRegistry()->registerStat(&d_numCachedSkolemsPre); + smtStatisticsRegistry()->registerStat(&d_numCachedSkolemsPost); } SequencesStatistics::~SequencesStatistics() @@ -76,6 +80,8 @@ SequencesStatistics::~SequencesStatistics() smtStatisticsRegistry()->unregisterStat(&d_lemmasRegisterTerm); smtStatisticsRegistry()->unregisterStat(&d_lemmasRegisterTermAtomic); smtStatisticsRegistry()->unregisterStat(&d_lemmasInfer); + smtStatisticsRegistry()->unregisterStat(&d_numCachedSkolemsPre); + smtStatisticsRegistry()->unregisterStat(&d_numCachedSkolemsPost); } } diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h index 8a1564587..94f04219f 100644 --- a/src/theory/strings/sequences_stats.h +++ b/src/theory/strings/sequences_stats.h @@ -20,6 +20,7 @@ #include "expr/kind.h" #include "theory/strings/infer_info.h" #include "theory/strings/rewrites.h" +#include "theory/strings/skolem_cache.h" #include "util/statistics_registry.h" namespace CVC4 { @@ -104,6 +105,8 @@ class SequencesStatistics /** Number of lemmas added due to inferences */ IntStat d_lemmasInfer; //--------------- end of lemmas + HistogramStat<SkolemId> d_numCachedSkolemsPre; + HistogramStat<SkolemId> d_numCachedSkolemsPost; }; } diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 14b12104d..2cab548d2 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -14,8 +14,10 @@ #include "theory/strings/skolem_cache.h" +#include "options/strings_options.h" #include "theory/rewriter.h" #include "theory/strings/arith_entail.h" +#include "theory/strings/sequences_stats.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" #include "util/rational.h" @@ -26,7 +28,8 @@ namespace CVC4 { namespace theory { namespace strings { -SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts) +SkolemCache::SkolemCache(SequencesStatistics& statistics, bool useOpts) + : d_statistics(statistics), d_useOpts(useOpts) { NodeManager* nm = NodeManager::currentNM(); d_strType = nm->stringType(); @@ -52,11 +55,21 @@ Node SkolemCache::mkTypedSkolemCached( a = a.isNull() ? a : Rewriter::rewrite(a); b = b.isNull() ? b : Rewriter::rewrite(b); - std::tie(id, a, b) = normalizeStringSkolem(id, a, b); + if (d_skolemCachePreOnly[a][b].find(id) == d_skolemCachePreOnly[a][b].end()) + { + d_skolemCachePreOnly[a][b][id] = Node::null(); + d_statistics.d_numCachedSkolemsPre << id; + } + + if (options::skolemSharing()) + { + std::tie(id, a, b) = normalizeStringSkolem(id, a, b); + } // optimization: if we aren't asking for the purification skolem for constant // a, and the skolem is equivalent to a, then we just return a. - if (d_useOpts && idOrig != SK_PURIFY && id == SK_PURIFY && a.isConst()) + if (d_useOpts && idOrig != SkolemId::SK_PURIFY && id == SkolemId::SK_PURIFY + && a.isConst()) { Trace("skolem-cache") << "...optimization: return constant " << a << std::endl; @@ -68,42 +81,52 @@ Node SkolemCache::mkTypedSkolemCached( { NodeManager* nm = NodeManager::currentNM(); Node sk; - switch (id) + if (options::skolemSharing()) { - // exists k. k = a - case SK_PURIFY: - sk = d_pskc.mkPurifySkolem(a, c, "string purify skolem"); - break; - // these are eliminated by normalizeStringSkolem - case SK_ID_V_SPT: - case SK_ID_V_SPT_REV: - case SK_ID_VC_SPT: - case SK_ID_VC_SPT_REV: - case SK_FIRST_CTN_POST: - case SK_ID_C_SPT: - case SK_ID_C_SPT_REV: - case SK_ID_DC_SPT: - case SK_ID_DC_SPT_REM: - case SK_ID_DEQ_X: - case SK_ID_DEQ_Y: - case SK_FIRST_CTN_PRE: - case SK_PREFIX: - case SK_SUFFIX_REM: - Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl; - break; - case SK_NUM_OCCUR: - case SK_OCCUR_INDEX: - default: + switch (id) { - Notice() << "Don't know how to handle Skolem ID " << id << std::endl; - Node v = nm->mkBoundVar(tn); - Node cond = nm->mkConst(true); - sk = d_pskc.mkSkolem(v, cond, c, "string skolem"); + // exists k. k = a + case SkolemId::SK_PURIFY: + sk = d_pskc.mkPurifySkolem(a, c, "string purify skolem"); + break; + // these are eliminated by normalizeStringSkolem + case SkolemId::SK_ID_V_SPT: + case SkolemId::SK_ID_V_SPT_REV: + case SkolemId::SK_ID_VC_SPT: + case SkolemId::SK_ID_VC_SPT_REV: + case SkolemId::SK_FIRST_CTN_POST: + case SkolemId::SK_ID_C_SPT: + case SkolemId::SK_ID_C_SPT_REV: + case SkolemId::SK_ID_DC_SPT: + case SkolemId::SK_ID_DC_SPT_REM: + case SkolemId::SK_ID_DEQ_X: + case SkolemId::SK_ID_DEQ_Y: + case SkolemId::SK_FIRST_CTN_PRE: + case SkolemId::SK_PREFIX: + case SkolemId::SK_SUFFIX_REM: + Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl; + break; + case SkolemId::SK_NUM_OCCUR: + case SkolemId::SK_OCCUR_INDEX: + default: + { + Notice() << "Don't know how to handle Skolem ID " << id << std::endl; + Node v = nm->mkBoundVar(tn); + Node cond = nm->mkConst(true); + sk = d_pskc.mkSkolem(v, cond, c, "string skolem"); + } + break; } - break; + } + else + { + Node v = nm->mkBoundVar(tn); + Node cond = nm->mkConst(true); + sk = d_pskc.mkSkolem(v, cond, c, "string skolem"); } d_allSkolems.insert(sk); d_skolemCache[a][b][id] = sk; + d_statistics.d_numCachedSkolemsPost << idOrig; return sk; } return it->second; @@ -129,107 +152,118 @@ bool SkolemCache::isSkolem(Node n) const return d_allSkolems.find(n) != d_allSkolems.end(); } -std::tuple<SkolemCache::SkolemId, Node, Node> -SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) +std::tuple<SkolemId, Node, Node> SkolemCache::normalizeStringSkolem(SkolemId id, + Node a, + Node b) { - NodeManager* nm = NodeManager::currentNM(); + if (id == SkolemId::SK_FIRST_CTN_IOPRE || id == SkolemId::SK_FIRST_CTN_RFCPRE) + { + id = SkolemId::SK_FIRST_CTN_PRE; + } + else if (id == SkolemId::SK_FIRST_CTN_IOPOST + || id == SkolemId::SK_FIRST_CTN_RFCPOST) + { + id = SkolemId::SK_FIRST_CTN_POST; + } + // eliminate in terms of prefix/suffix_rem - if (id == SK_FIRST_CTN_POST) + if (id == SkolemId::SK_FIRST_CTN_POST) { // SK_FIRST_CTN_POST(x, y) ---> // SK_SUFFIX_REM(x, (+ (str.len SK_FIRST_CTN_PRE(x, y)) (str.len y))) - id = SK_SUFFIX_REM; - Node pre = mkSkolemCached(a, b, SK_FIRST_CTN_PRE, "pre"); + id = SkolemId::SK_SUFFIX_REM; + Node pre = mkSkolemCached(a, b, SkolemId::SK_FIRST_CTN_PRE, "pre"); b = nm->mkNode( PLUS, nm->mkNode(STRING_LENGTH, pre), nm->mkNode(STRING_LENGTH, b)); } - else if (id == SK_ID_V_SPT || id == SK_ID_C_SPT) + else if (id == SkolemId::SK_ID_V_SPT || id == SkolemId::SK_ID_C_SPT) { // SK_ID_*_SPT(x, y) ---> SK_SUFFIX_REM(x, (str.len y)) - id = SK_SUFFIX_REM; + id = SkolemId::SK_SUFFIX_REM; b = nm->mkNode(STRING_LENGTH, b); } - else if (id == SK_ID_V_SPT_REV || id == SK_ID_C_SPT_REV) + else if (id == SkolemId::SK_ID_V_SPT_REV || id == SkolemId::SK_ID_C_SPT_REV) { // SK_ID_*_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) (str.len y))) - id = SK_PREFIX; + id = SkolemId::SK_PREFIX; b = nm->mkNode( MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkNode(STRING_LENGTH, b)); } - else if (id == SK_ID_VC_SPT) + else if (id == SkolemId::SK_ID_VC_SPT) { // SK_ID_VC_SPT(x, y) ---> SK_SUFFIX_REM(x, 1) - id = SK_SUFFIX_REM; + id = SkolemId::SK_SUFFIX_REM; b = nm->mkConst(Rational(1)); } - else if (id == SK_ID_VC_SPT_REV) + else if (id == SkolemId::SK_ID_VC_SPT_REV) { // SK_ID_VC_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) 1)) - id = SK_PREFIX; + id = SkolemId::SK_PREFIX; b = nm->mkNode( MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkConst(Rational(1))); } - else if (id == SK_ID_DC_SPT) + else if (id == SkolemId::SK_ID_DC_SPT) { // SK_ID_DC_SPT(x, y) ---> SK_PREFIX(x, 1) - id = SK_PREFIX; + id = SkolemId::SK_PREFIX; b = nm->mkConst(Rational(1)); } - else if (id == SK_ID_DC_SPT_REM) + else if (id == SkolemId::SK_ID_DC_SPT_REM) { // SK_ID_DC_SPT_REM(x, y) ---> SK_SUFFIX_REM(x, 1) - id = SK_SUFFIX_REM; + id = SkolemId::SK_SUFFIX_REM; b = nm->mkConst(Rational(1)); } - else if (id == SK_ID_DEQ_X) + else if (id == SkolemId::SK_ID_DEQ_X) { // SK_ID_DEQ_X(x, y) ---> SK_PREFIX(y, (str.len x)) - id = SK_PREFIX; + id = SkolemId::SK_PREFIX; Node aOld = a; a = b; b = nm->mkNode(STRING_LENGTH, aOld); } - else if (id == SK_ID_DEQ_Y) + else if (id == SkolemId::SK_ID_DEQ_Y) { // SK_ID_DEQ_Y(x, y) ---> SK_PREFIX(x, (str.len y)) - id = SK_PREFIX; + id = SkolemId::SK_PREFIX; b = nm->mkNode(STRING_LENGTH, b); } - else if (id == SK_FIRST_CTN_PRE) + else if (id == SkolemId::SK_FIRST_CTN_PRE) { // SK_FIRST_CTN_PRE(x,y) ---> SK_PREFIX(x, indexof(x,y,0)) - id = SK_PREFIX; + id = SkolemId::SK_PREFIX; b = nm->mkNode(STRING_STRIDOF, a, b, d_zero); } - if (id == SK_ID_V_UNIFIED_SPT || id == SK_ID_V_UNIFIED_SPT_REV) + if (id == SkolemId::SK_ID_V_UNIFIED_SPT + || id == SkolemId::SK_ID_V_UNIFIED_SPT_REV) { - bool isRev = (id == SK_ID_V_UNIFIED_SPT_REV); + bool isRev = (id == SkolemId::SK_ID_V_UNIFIED_SPT_REV); Node la = nm->mkNode(STRING_LENGTH, a); Node lb = nm->mkNode(STRING_LENGTH, b); Node ta = isRev ? utils::mkPrefix(a, nm->mkNode(MINUS, la, lb)) : utils::mkSuffix(a, lb); Node tb = isRev ? utils::mkPrefix(b, nm->mkNode(MINUS, lb, la)) : utils::mkSuffix(b, la); - id = SK_PURIFY; + id = SkolemId::SK_PURIFY; a = nm->mkNode(ITE, nm->mkNode(GEQ, la, lb), ta, tb); b = Node::null(); } // now, eliminate prefix/suffix_rem in terms of purify - if (id == SK_PREFIX) + if (id == SkolemId::SK_PREFIX) { // SK_PREFIX(x,y) ---> SK_PURIFY(substr(x,0,y)) - id = SK_PURIFY; + id = SkolemId::SK_PURIFY; a = utils::mkPrefix(a, b); b = Node::null(); } - else if (id == SK_SUFFIX_REM) + else if (id == SkolemId::SK_SUFFIX_REM) { // SK_SUFFIX_REM(x,y) ---> SK_PURIFY(substr(x,y,str.len(x)-y)) - id = SK_PURIFY; + id = SkolemId::SK_PURIFY; a = utils::mkSuffix(a, b); b = Node::null(); } @@ -242,6 +276,43 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) return std::make_tuple(id, a, b); } +const char* toString(SkolemId id) +{ + switch (id) + { + case SkolemId::SK_PURIFY: return "SK_PURIFY"; + case SkolemId::SK_ID_C_SPT: return "SK_ID_C_SPT"; + case SkolemId::SK_ID_C_SPT_REV: return "SK_ID_C_SPT_REV"; + case SkolemId::SK_ID_VC_SPT: return "SK_ID_VC_SPT"; + case SkolemId::SK_ID_VC_SPT_REV: return "SK_ID_VC_SPT_REV"; + case SkolemId::SK_ID_V_SPT: return "SK_ID_V_SPT"; + case SkolemId::SK_ID_V_SPT_REV: return "SK_ID_V_SPT_REV"; + case SkolemId::SK_ID_V_UNIFIED_SPT: return "SK_ID_V_UNIFIED_SPT"; + case SkolemId::SK_ID_V_UNIFIED_SPT_REV: return "SK_ID_V_UNIFIED_SPT_REV"; + case SkolemId::SK_ID_DC_SPT: return "SK_ID_DC_SPT"; + case SkolemId::SK_ID_DC_SPT_REM: return "SK_ID_DC_SPT_REM"; + case SkolemId::SK_ID_DEQ_X: return "SK_ID_DEQ_X"; + case SkolemId::SK_ID_DEQ_Y: return "SK_ID_DEQ_Y"; + case SkolemId::SK_FIRST_CTN_PRE: return "SK_FIRST_CTN_PRE"; + case SkolemId::SK_FIRST_CTN_POST: return "SK_FIRST_CTN_POST"; + case SkolemId::SK_FIRST_CTN_IOPRE: return "SK_FIRST_CTN_IOPRE"; + case SkolemId::SK_FIRST_CTN_IOPOST: return "SK_FIRST_CTN_IOPOST"; + case SkolemId::SK_FIRST_CTN_RFCPRE: return "SK_FIRST_CTN_RFCPRE"; + case SkolemId::SK_FIRST_CTN_RFCPOST: return "SK_FIRST_CTN_RFCPOST"; + case SkolemId::SK_PREFIX: return "SK_PREFIX"; + case SkolemId::SK_SUFFIX_REM: return "SK_SUFFIX_REM"; + case SkolemId::SK_NUM_OCCUR: return "SK_NUM_OCCUR"; + case SkolemId::SK_OCCUR_INDEX: return "SK_OCCUR_INDEX"; + default: return "?"; + } +} + +std::ostream& operator<<(std::ostream& out, SkolemId id) +{ + out << toString(id); + return out; +} + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index a88b3cfb7..4f2c05b99 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -28,6 +28,90 @@ namespace CVC4 { namespace theory { namespace strings { +class SequencesStatistics; + +/** Identifiers for skolem types + * + * The comments below document the properties of each skolem introduced by + * inference in the strings solver, where by skolem we mean the fresh + * string variable that witnesses each of "exists k". + * + * The skolems with _REV suffixes are used for the reverse version of the + * preconditions below, e.g. where we are considering a' ++ a = b' ++ b. + * + * All skolems assume a and b are strings unless otherwise stated. + */ +enum class SkolemId +{ + // exists k. k = a + SK_PURIFY, + // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' => + // exists k. a = "cccc" ++ k + SK_ID_C_SPT, + SK_ID_C_SPT_REV, + // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => + // exists k. a = "c" ++ k + SK_ID_VC_SPT, + SK_ID_VC_SPT_REV, + // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => + // exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k ) + SK_ID_V_SPT, + SK_ID_V_SPT_REV, + SK_ID_V_UNIFIED_SPT, + SK_ID_V_UNIFIED_SPT_REV, + // a != "" ^ b = "c" ^ a ++ a' != b ++ b' => + // exists k, k_rem. + // len( k ) = 1 ^ + // ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) ) + SK_ID_DC_SPT, + SK_ID_DC_SPT_REM, + // a != "" ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' => + // exists k_x k_y k_z. + // ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0 + // ( a = k_x ++ k_z OR b = k_y ++ k_z ) ) + SK_ID_DEQ_X, + SK_ID_DEQ_Y, + // contains( a, b ) => + // exists k_pre, k_post. a = k_pre ++ b ++ k_post ^ + // ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b) + // + // As an optimization, these skolems are reused for positive occurrences of + // contains, where they have the semantics: + // + // contains( a, b ) => + // exists k_pre, k_post. a = k_pre ++ b ++ k_post + // + // We reuse them since it is sound to consider w.l.o.g. the first occurrence + // of b in a as the witness for contains( a, b ). + SK_FIRST_CTN_PRE, + SK_FIRST_CTN_POST, + + SK_FIRST_CTN_IOPRE, + SK_FIRST_CTN_IOPOST, + + SK_FIRST_CTN_RFCPRE, + SK_FIRST_CTN_RFCPOST, + + // For integer b, + // len( a ) > b => + // exists k. a = k ++ a' ^ len( k ) = b + SK_PREFIX, + // For integer b, + // b > 0 => + // exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 ) + SK_SUFFIX_REM, + // --------------- integer skolems + // exists k. ( b occurs k times in a ) + SK_NUM_OCCUR, + // --------------- function skolems + // For function k: Int -> Int + // exists k. + // forall 0 <= x <= n, + // k(x) is the end index of the x^th occurrence of b in a + // where n is the number of occurrences of b in a, and k(0)=0. + SK_OCCUR_INDEX, +}; + /** * A cache of all string skolems generated by the TheoryStrings class. This * cache is used to ensure that duplicate skolems are not generated when @@ -42,81 +126,7 @@ class SkolemCache * useOpts determines if we aggressively share Skolems or return the constants * they are entailed to be equal to. */ - SkolemCache(bool useOpts = true); - /** Identifiers for skolem types - * - * The comments below document the properties of each skolem introduced by - * inference in the strings solver, where by skolem we mean the fresh - * string variable that witnesses each of "exists k". - * - * The skolems with _REV suffixes are used for the reverse version of the - * preconditions below, e.g. where we are considering a' ++ a = b' ++ b. - * - * All skolems assume a and b are strings unless otherwise stated. - */ - enum SkolemId - { - // exists k. k = a - SK_PURIFY, - // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' => - // exists k. a = "cccc" ++ k - SK_ID_C_SPT, - SK_ID_C_SPT_REV, - // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => - // exists k. a = "c" ++ k - SK_ID_VC_SPT, - SK_ID_VC_SPT_REV, - // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => - // exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k ) - SK_ID_V_SPT, - SK_ID_V_SPT_REV, - SK_ID_V_UNIFIED_SPT, - SK_ID_V_UNIFIED_SPT_REV, - // a != "" ^ b = "c" ^ a ++ a' != b ++ b' => - // exists k, k_rem. - // len( k ) = 1 ^ - // ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) ) - SK_ID_DC_SPT, - SK_ID_DC_SPT_REM, - // a != "" ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' => - // exists k_x k_y k_z. - // ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0 - // ( a = k_x ++ k_z OR b = k_y ++ k_z ) ) - SK_ID_DEQ_X, - SK_ID_DEQ_Y, - // contains( a, b ) => - // exists k_pre, k_post. a = k_pre ++ b ++ k_post ^ - // ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b) - // - // As an optimization, these skolems are reused for positive occurrences of - // contains, where they have the semantics: - // - // contains( a, b ) => - // exists k_pre, k_post. a = k_pre ++ b ++ k_post - // - // We reuse them since it is sound to consider w.l.o.g. the first occurrence - // of b in a as the witness for contains( a, b ). - SK_FIRST_CTN_PRE, - SK_FIRST_CTN_POST, - // For integer b, - // len( a ) > b => - // exists k. a = k ++ a' ^ len( k ) = b - SK_PREFIX, - // For integer b, - // b > 0 => - // exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 ) - SK_SUFFIX_REM, - // --------------- integer skolems - // exists k. ( b occurs k times in a ) - SK_NUM_OCCUR, - // --------------- function skolems - // For function k: Int -> Int - // exists k. - // forall 0 <= x <= n, - // k(x) is the end index of the x^th occurrence of b in a - // where n is the number of occurrences of b in a, and k(0)=0. - SK_OCCUR_INDEX, - }; + SkolemCache(SequencesStatistics& statistics, bool useOpts = true); /** * Returns a skolem of type string that is cached for (a,b,id) and has * name c. @@ -155,6 +165,8 @@ class SkolemCache std::tuple<SkolemId, Node, Node> normalizeStringSkolem(SkolemId id, Node a, Node b); + + SequencesStatistics& d_statistics; /** whether we are using optimizations */ bool d_useOpts; /** string type */ @@ -163,12 +175,17 @@ class SkolemCache Node d_zero; /** map from node pairs and identifiers to skolems */ std::map<Node, std::map<Node, std::map<SkolemId, Node> > > d_skolemCache; + std::map<Node, std::map<Node, std::map<SkolemId, Node> > > + d_skolemCachePreOnly; /** the set of all skolems we have generated */ std::unordered_set<Node, NodeHashFunction> d_allSkolems; /** A proof skolem cache */ ProofSkolemCache d_pskc; }; +const char* toString(SkolemId id); +std::ostream& operator<<(std::ostream& out, SkolemId id); + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index 631d4b8e4..276aab3a2 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -44,6 +44,7 @@ TermRegistry::TermRegistry(context::Context* c, d_out(out), d_statistics(statistics), d_hasStrCode(false), + d_skCache(statistics), d_functionsTerms(c), d_inputVars(u), d_preregisteredTerms(u), @@ -276,7 +277,7 @@ Node TermRegistry::getRegisterTermLemma(Node n) return Node::null(); } } - Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym"); + Node sk = d_skCache.mkSkolemCached(n, SkolemId::SK_PURIFY, "lsym"); StringsProxyVarAttribute spva; sk.setAttribute(spva, true); Node eq = Rewriter::rewrite(sk.eqNode(n)); diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 939146a3d..1c7ffb4a2 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -59,7 +59,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node s = t[0]; Node n = t[1]; Node m = t[2]; - Node skt = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); + Node skt = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "sst"); Node t12 = nm->mkNode(PLUS, n, m); t12 = Rewriter::rewrite(t12); Node lt0 = nm->mkNode(STRING_LENGTH, s); @@ -73,13 +73,13 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node emp = Word::mkEmptyWord(t.getType()); - Node sk1 = n == d_zero ? emp - : d_sc->mkSkolemCached( - s, n, SkolemCache::SK_PREFIX, "sspre"); - Node sk2 = ArithEntail::check(t12, lt0) + Node sk1 = n == d_zero ? emp - : d_sc->mkSkolemCached( - s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); + : d_sc->mkSkolemCached(s, n, SkolemId::SK_PREFIX, "sspre"); + Node sk2 = + ArithEntail::check(t12, lt0) + ? emp + : d_sc->mkSkolemCached(s, t12, SkolemId::SK_SUFFIX_REM, "sssufr"); Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2)); //length of first skolem is second argument Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); @@ -139,9 +139,9 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n)); Node io2 = - d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre"); + d_sc->mkSkolemCached(st, y, SkolemId::SK_FIRST_CTN_IOPRE, "iopre"); Node io4 = - d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost"); + d_sc->mkSkolemCached(st, y, SkolemId::SK_FIRST_CTN_IOPOST, "iopost"); // ~contains( substr( x, n, len( x ) - n ), y ) Node c11 = nm->mkNode(STRING_STRCTN, st, y).negate(); @@ -198,7 +198,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { { // processing term: int.to.str( n ) Node n = t[0]; - Node itost = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost"); + Node itost = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "itost"); Node leni = nm->mkNode(STRING_LENGTH, itost); std::vector<Node> conc; @@ -362,10 +362,10 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node z = t[2]; TypeNode tn = t[0].getType(); Node rp1 = - d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre"); + d_sc->mkSkolemCached(x, y, SkolemId::SK_FIRST_CTN_RFCPRE, "rfcpre"); Node rp2 = - d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost"); - Node rpw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw"); + d_sc->mkSkolemCached(x, y, SkolemId::SK_FIRST_CTN_RFCPOST, "rfcpost"); + Node rpw = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "rpw"); // y = "" Node emp = Word::mkEmptyWord(tn); @@ -421,17 +421,17 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node x = t[0]; Node y = t[1]; Node z = t[2]; - Node rpaw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw"); + Node rpaw = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "rpaw"); Node numOcc = d_sc->mkTypedSkolemCached( - nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); + nm->integerType(), x, y, SkolemId::SK_NUM_OCCUR, "numOcc"); std::vector<TypeNode> argTypes; argTypes.push_back(nm->integerType()); Node us = nm->mkSkolem("Us", nm->mkFunctionType(argTypes, nm->stringType())); TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); - Node uf = d_sc->mkTypedSkolemCached( - ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); + Node uf = + d_sc->mkTypedSkolemCached(ufType, x, y, SkolemId::SK_OCCUR_INDEX, "Uf"); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); Node usno = nm->mkNode(APPLY_UF, us, numOcc); @@ -495,7 +495,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) { Node x = t[0]; - Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); + Node r = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "r"); Node lenx = nm->mkNode(STRING_LENGTH, x); Node lenr = nm->mkNode(STRING_LENGTH, r); @@ -541,7 +541,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { else if (t.getKind() == STRING_REV) { Node x = t[0]; - Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); + Node r = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "r"); Node lenx = nm->mkNode(STRING_LENGTH, x); Node lenr = nm->mkNode(STRING_LENGTH, r); |