Add skolem stats and sharing togglefmcad2020-strings

author: Andres Noetzli <andres.noetzli@gmail.com> 2020-05-08 22:42:22 -0700
committer: Andres Noetzli <andres.noetzli@gmail.com> 2020-05-11 22:22:48 -0700
commit: 5a610d9d82a40ba9e78a1a948c82aa42e7b961ae (patch)
tree: fd69a4b1b52d1ef5ec8723c537f6179913d5be33
parent: ea7a9c71305bad749f4e148097585fdcc28004f8 (diff)
10 files changed, 288 insertions, 179 deletions
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml
index 44ca2c390..509ac0c36 100644
--- a/src/options/strings_options.toml
+++ b/src/options/strings_options.toml
@@ -195,6 +195,15 @@ header = "options/strings_options.h"
   help       = "do flat form inferences"
 
 [[option]]
+  name       = "skolemSharing"
+  category   = "expert"
+  long       = "skolem-sharing"
+  type       = "bool"
+  default    = "true"
+  read_only  = true
+  help       = "use skolem sharing"
+
+[[option]]
   name       = "stringRegExpInterMode"
   category   = "expert"
   long       = "re-inter-mode=MODE"
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index ab528ac03..942c2c0a3 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -1263,7 +1263,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
           // purify variable for this string to communicate that
           // we have inferred whether it is empty.
           SkolemCache* skc = d_termReg.getSkolemCache();
-          Node p = skc->mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym");
+          Node p = skc->mkSkolemCached(nc, SkolemId::SK_PURIFY, "lsym");
           Node pEq = p.eqNode(emp);
           // should not be constant
           Assert(!Rewriter::rewrite(pEq).isConst());
@@ -1341,7 +1341,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
           Node sk = skc->mkSkolemCached(
               nc,
               prea,
-              isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT,
+              isRev ? SkolemId::SK_ID_C_SPT_REV : SkolemId::SK_ID_C_SPT,
               "c_spt");
           Trace("strings-csp")
               << "Const Split: " << prea << " is removed from " << stra
@@ -1367,7 +1367,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       SkolemCache* skc = d_termReg.getSkolemCache();
       Node sk = skc->mkSkolemCached(
           nc,
-          isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT,
+          isRev ? SkolemId::SK_ID_VC_SPT_REV : SkolemId::SK_ID_VC_SPT,
           "c_spt");
       Trace("strings-csp") << "Const Split: " << firstChar
                            << " is removed from " << stra << " (serial) "
@@ -1442,8 +1442,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
     SkolemCache* skc = d_termReg.getSkolemCache();
     Node sk = skc->mkSkolemCached(x,
                                   y,
-                                  isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV
-                                        : SkolemCache::SK_ID_V_UNIFIED_SPT,
+                                  isRev ? SkolemId::SK_ID_V_UNIFIED_SPT_REV
+                                        : SkolemId::SK_ID_V_UNIFIED_SPT,
                                   "v_spt");
     iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk);
     Node eq1 =
@@ -1856,11 +1856,10 @@ void CoreSolver::processDeq(Node ni, Node nj)
           // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" --->
           //      x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++  k2)
           SkolemCache* skc = d_termReg.getSkolemCache();
-          Node sk =
-              skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt");
+          Node sk = skc->mkSkolemCached(nck, SkolemId::SK_ID_DC_SPT, "dc_spt");
           d_termReg.registerTermAtomic(sk, LENGTH_ONE);
           Node skr = skc->mkSkolemCached(
-              nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem");
+              nck, SkolemId::SK_ID_DC_SPT_REM, "dc_spt_rem");
           Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr));
           eq1 = Rewriter::rewrite(eq1);
           Node eq2 =
@@ -1904,14 +1903,10 @@ void CoreSolver::processDeq(Node ni, Node nj)
 
         std::vector<Node> conc;
         SkolemCache* skc = d_termReg.getSkolemCache();
-        Node sk1 =
-            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit");
-        Node sk2 =
-            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit");
-        Node sk3 =
-            skc->mkSkolemCached(y, x, SkolemCache::SK_ID_V_SPT, "z_dsplit");
-        Node sk4 =
-            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_V_SPT, "w_dsplit");
+        Node sk1 = skc->mkSkolemCached(x, y, SkolemId::SK_ID_DEQ_X, "x_dsplit");
+        Node sk2 = skc->mkSkolemCached(x, y, SkolemId::SK_ID_DEQ_Y, "y_dsplit");
+        Node sk3 = skc->mkSkolemCached(y, x, SkolemId::SK_ID_V_SPT, "z_dsplit");
+        Node sk4 = skc->mkSkolemCached(x, y, SkolemId::SK_ID_V_SPT, "w_dsplit");
         d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE);
         d_termReg.registerTermAtomic(sk4, LENGTH_GEQ_ONE);
         Node sk1Len = utils::mkNLength(sk1);
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp
index dd4144313..a6e603d12 100644
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -161,8 +161,8 @@ bool ExtfSolver::doReduction(int effort, Node n)
     Node s = n[1];
     // positive contains reduces to a equality
     SkolemCache* skc = d_termReg.getSkolemCache();
-    Node sk1 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1");
-    Node sk2 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2");
+    Node sk1 = skc->mkSkolemCached(x, s, SkolemId::SK_FIRST_CTN_PRE, "sc1");
+    Node sk2 = skc->mkSkolemCached(x, s, SkolemId::SK_FIRST_CTN_POST, "sc2");
     Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2)));
     std::vector<Node> exp_vec;
     exp_vec.push_back(n);
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp
index d4f301e23..226a97b30 100644
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -232,7 +232,14 @@ Node RegExpElimination::eliminateConcat(Node atom)
           prev_end = nm->mkNode(PLUS, prev_end, k);
         }
         Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end);
-        Node idofFind = curr.eqNode(d_neg_one).negate();
+        Node idofFind = nm->mkNode(
+            STRING_STRCTN,
+            nm->mkNode(
+                STRING_SUBSTR,
+                x,
+                prev_end,
+                nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), prev_end)),
+            sc);
         conj.push_back(idofFind);
         prev_end = nm->mkNode(PLUS, curr, lensc);
       }
diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp
index c830a68bd..23091ed07 100644
--- a/src/theory/strings/sequences_stats.cpp
+++ b/src/theory/strings/sequences_stats.cpp
@@ -38,7 +38,9 @@ SequencesStatistics::SequencesStatistics()
       d_lemmasRegisterTerm("theory::strings::lemmasRegisterTerm", 0),
       d_lemmasRegisterTermAtomic("theory::strings::lemmasRegisterTermAtomic",
                                  0),
-      d_lemmasInfer("theory::strings::lemmasInfer", 0)
+      d_lemmasInfer("theory::strings::lemmasInfer", 0),
+      d_numCachedSkolemsPre("theory::strings::NumCachedSkolemsPre"),
+      d_numCachedSkolemsPost("theory::strings::NumCachedSkolemsPost")
 {
   smtStatisticsRegistry()->registerStat(&d_checkRuns);
   smtStatisticsRegistry()->registerStat(&d_strategyRuns);
@@ -56,6 +58,8 @@ SequencesStatistics::SequencesStatistics()
   smtStatisticsRegistry()->registerStat(&d_lemmasRegisterTerm);
   smtStatisticsRegistry()->registerStat(&d_lemmasRegisterTermAtomic);
   smtStatisticsRegistry()->registerStat(&d_lemmasInfer);
+  smtStatisticsRegistry()->registerStat(&d_numCachedSkolemsPre);
+  smtStatisticsRegistry()->registerStat(&d_numCachedSkolemsPost);
 }
 
 SequencesStatistics::~SequencesStatistics()
@@ -76,6 +80,8 @@ SequencesStatistics::~SequencesStatistics()
   smtStatisticsRegistry()->unregisterStat(&d_lemmasRegisterTerm);
   smtStatisticsRegistry()->unregisterStat(&d_lemmasRegisterTermAtomic);
   smtStatisticsRegistry()->unregisterStat(&d_lemmasInfer);
+  smtStatisticsRegistry()->unregisterStat(&d_numCachedSkolemsPre);
+  smtStatisticsRegistry()->unregisterStat(&d_numCachedSkolemsPost);
 }
 
 }
diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h
index 8a1564587..94f04219f 100644
--- a/src/theory/strings/sequences_stats.h
+++ b/src/theory/strings/sequences_stats.h
@@ -20,6 +20,7 @@
 #include "expr/kind.h"
 #include "theory/strings/infer_info.h"
 #include "theory/strings/rewrites.h"
+#include "theory/strings/skolem_cache.h"
 #include "util/statistics_registry.h"
 
 namespace CVC4 {
@@ -104,6 +105,8 @@ class SequencesStatistics
   /** Number of lemmas added due to inferences */
   IntStat d_lemmasInfer;
   //--------------- end of lemmas
+  HistogramStat<SkolemId> d_numCachedSkolemsPre;
+  HistogramStat<SkolemId> d_numCachedSkolemsPost;
 };
 
 }
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp
index 14b12104d..2cab548d2 100644
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -14,8 +14,10 @@
 
 #include "theory/strings/skolem_cache.h"
 
+#include "options/strings_options.h"
 #include "theory/rewriter.h"
 #include "theory/strings/arith_entail.h"
+#include "theory/strings/sequences_stats.h"
 #include "theory/strings/theory_strings_utils.h"
 #include "theory/strings/word.h"
 #include "util/rational.h"
@@ -26,7 +28,8 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
-SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts)
+SkolemCache::SkolemCache(SequencesStatistics& statistics, bool useOpts)
+    : d_statistics(statistics), d_useOpts(useOpts)
 {
   NodeManager* nm = NodeManager::currentNM();
   d_strType = nm->stringType();
@@ -52,11 +55,21 @@ Node SkolemCache::mkTypedSkolemCached(
   a = a.isNull() ? a : Rewriter::rewrite(a);
   b = b.isNull() ? b : Rewriter::rewrite(b);
 
-  std::tie(id, a, b) = normalizeStringSkolem(id, a, b);
+  if (d_skolemCachePreOnly[a][b].find(id) == d_skolemCachePreOnly[a][b].end())
+  {
+    d_skolemCachePreOnly[a][b][id] = Node::null();
+    d_statistics.d_numCachedSkolemsPre << id;
+  }
+
+  if (options::skolemSharing())
+  {
+    std::tie(id, a, b) = normalizeStringSkolem(id, a, b);
+  }
 
   // optimization: if we aren't asking for the purification skolem for constant
   // a, and the skolem is equivalent to a, then we just return a.
-  if (d_useOpts && idOrig != SK_PURIFY && id == SK_PURIFY && a.isConst())
+  if (d_useOpts && idOrig != SkolemId::SK_PURIFY && id == SkolemId::SK_PURIFY
+      && a.isConst())
   {
     Trace("skolem-cache") << "...optimization: return constant " << a
                           << std::endl;
@@ -68,42 +81,52 @@ Node SkolemCache::mkTypedSkolemCached(
   {
     NodeManager* nm = NodeManager::currentNM();
     Node sk;
-    switch (id)
+    if (options::skolemSharing())
     {
-      // exists k. k = a
-      case SK_PURIFY:
-        sk = d_pskc.mkPurifySkolem(a, c, "string purify skolem");
-        break;
-      // these are eliminated by normalizeStringSkolem
-      case SK_ID_V_SPT:
-      case SK_ID_V_SPT_REV:
-      case SK_ID_VC_SPT:
-      case SK_ID_VC_SPT_REV:
-      case SK_FIRST_CTN_POST:
-      case SK_ID_C_SPT:
-      case SK_ID_C_SPT_REV:
-      case SK_ID_DC_SPT:
-      case SK_ID_DC_SPT_REM:
-      case SK_ID_DEQ_X:
-      case SK_ID_DEQ_Y:
-      case SK_FIRST_CTN_PRE:
-      case SK_PREFIX:
-      case SK_SUFFIX_REM:
-        Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl;
-        break;
-      case SK_NUM_OCCUR:
-      case SK_OCCUR_INDEX:
-      default:
+      switch (id)
       {
-        Notice() << "Don't know how to handle Skolem ID " << id << std::endl;
-        Node v = nm->mkBoundVar(tn);
-        Node cond = nm->mkConst(true);
-        sk = d_pskc.mkSkolem(v, cond, c, "string skolem");
+        // exists k. k = a
+        case SkolemId::SK_PURIFY:
+          sk = d_pskc.mkPurifySkolem(a, c, "string purify skolem");
+          break;
+        // these are eliminated by normalizeStringSkolem
+        case SkolemId::SK_ID_V_SPT:
+        case SkolemId::SK_ID_V_SPT_REV:
+        case SkolemId::SK_ID_VC_SPT:
+        case SkolemId::SK_ID_VC_SPT_REV:
+        case SkolemId::SK_FIRST_CTN_POST:
+        case SkolemId::SK_ID_C_SPT:
+        case SkolemId::SK_ID_C_SPT_REV:
+        case SkolemId::SK_ID_DC_SPT:
+        case SkolemId::SK_ID_DC_SPT_REM:
+        case SkolemId::SK_ID_DEQ_X:
+        case SkolemId::SK_ID_DEQ_Y:
+        case SkolemId::SK_FIRST_CTN_PRE:
+        case SkolemId::SK_PREFIX:
+        case SkolemId::SK_SUFFIX_REM:
+          Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl;
+          break;
+        case SkolemId::SK_NUM_OCCUR:
+        case SkolemId::SK_OCCUR_INDEX:
+        default:
+        {
+          Notice() << "Don't know how to handle Skolem ID " << id << std::endl;
+          Node v = nm->mkBoundVar(tn);
+          Node cond = nm->mkConst(true);
+          sk = d_pskc.mkSkolem(v, cond, c, "string skolem");
+        }
+        break;
       }
-      break;
+    }
+    else
+    {
+      Node v = nm->mkBoundVar(tn);
+      Node cond = nm->mkConst(true);
+      sk = d_pskc.mkSkolem(v, cond, c, "string skolem");
     }
     d_allSkolems.insert(sk);
     d_skolemCache[a][b][id] = sk;
+    d_statistics.d_numCachedSkolemsPost << idOrig;
     return sk;
   }
   return it->second;
@@ -129,107 +152,118 @@ bool SkolemCache::isSkolem(Node n) const
   return d_allSkolems.find(n) != d_allSkolems.end();
 }
 
-std::tuple<SkolemCache::SkolemId, Node, Node>
-SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b)
+std::tuple<SkolemId, Node, Node> SkolemCache::normalizeStringSkolem(SkolemId id,
+                                                                    Node a,
+                                                                    Node b)
 {
-
   NodeManager* nm = NodeManager::currentNM();
 
+  if (id == SkolemId::SK_FIRST_CTN_IOPRE || id == SkolemId::SK_FIRST_CTN_RFCPRE)
+  {
+    id = SkolemId::SK_FIRST_CTN_PRE;
+  }
+  else if (id == SkolemId::SK_FIRST_CTN_IOPOST
+           || id == SkolemId::SK_FIRST_CTN_RFCPOST)
+  {
+    id = SkolemId::SK_FIRST_CTN_POST;
+  }
+
   // eliminate in terms of prefix/suffix_rem
-  if (id == SK_FIRST_CTN_POST)
+  if (id == SkolemId::SK_FIRST_CTN_POST)
   {
     // SK_FIRST_CTN_POST(x, y) --->
     //   SK_SUFFIX_REM(x, (+ (str.len SK_FIRST_CTN_PRE(x, y)) (str.len y)))
-    id = SK_SUFFIX_REM;
-    Node pre = mkSkolemCached(a, b, SK_FIRST_CTN_PRE, "pre");
+    id = SkolemId::SK_SUFFIX_REM;
+    Node pre = mkSkolemCached(a, b, SkolemId::SK_FIRST_CTN_PRE, "pre");
     b = nm->mkNode(
         PLUS, nm->mkNode(STRING_LENGTH, pre), nm->mkNode(STRING_LENGTH, b));
   }
-  else if (id == SK_ID_V_SPT || id == SK_ID_C_SPT)
+  else if (id == SkolemId::SK_ID_V_SPT || id == SkolemId::SK_ID_C_SPT)
   {
     // SK_ID_*_SPT(x, y) ---> SK_SUFFIX_REM(x, (str.len y))
-    id = SK_SUFFIX_REM;
+    id = SkolemId::SK_SUFFIX_REM;
     b = nm->mkNode(STRING_LENGTH, b);
   }
-  else if (id == SK_ID_V_SPT_REV || id == SK_ID_C_SPT_REV)
+  else if (id == SkolemId::SK_ID_V_SPT_REV || id == SkolemId::SK_ID_C_SPT_REV)
   {
     // SK_ID_*_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) (str.len y)))
-    id = SK_PREFIX;
+    id = SkolemId::SK_PREFIX;
     b = nm->mkNode(
         MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkNode(STRING_LENGTH, b));
   }
-  else if (id == SK_ID_VC_SPT)
+  else if (id == SkolemId::SK_ID_VC_SPT)
   {
     // SK_ID_VC_SPT(x, y) ---> SK_SUFFIX_REM(x, 1)
-    id = SK_SUFFIX_REM;
+    id = SkolemId::SK_SUFFIX_REM;
     b = nm->mkConst(Rational(1));
   }
-  else if (id == SK_ID_VC_SPT_REV)
+  else if (id == SkolemId::SK_ID_VC_SPT_REV)
   {
     // SK_ID_VC_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) 1))
-    id = SK_PREFIX;
+    id = SkolemId::SK_PREFIX;
     b = nm->mkNode(
         MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkConst(Rational(1)));
   }
-  else if (id == SK_ID_DC_SPT)
+  else if (id == SkolemId::SK_ID_DC_SPT)
   {
     // SK_ID_DC_SPT(x, y) ---> SK_PREFIX(x, 1)
-    id = SK_PREFIX;
+    id = SkolemId::SK_PREFIX;
     b = nm->mkConst(Rational(1));
   }
-  else if (id == SK_ID_DC_SPT_REM)
+  else if (id == SkolemId::SK_ID_DC_SPT_REM)
   {
     // SK_ID_DC_SPT_REM(x, y) ---> SK_SUFFIX_REM(x, 1)
-    id = SK_SUFFIX_REM;
+    id = SkolemId::SK_SUFFIX_REM;
     b = nm->mkConst(Rational(1));
   }
-  else if (id == SK_ID_DEQ_X)
+  else if (id == SkolemId::SK_ID_DEQ_X)
   {
     // SK_ID_DEQ_X(x, y) ---> SK_PREFIX(y, (str.len x))
-    id = SK_PREFIX;
+    id = SkolemId::SK_PREFIX;
     Node aOld = a;
     a = b;
     b = nm->mkNode(STRING_LENGTH, aOld);
   }
-  else if (id == SK_ID_DEQ_Y)
+  else if (id == SkolemId::SK_ID_DEQ_Y)
   {
     // SK_ID_DEQ_Y(x, y) ---> SK_PREFIX(x, (str.len y))
-    id = SK_PREFIX;
+    id = SkolemId::SK_PREFIX;
     b = nm->mkNode(STRING_LENGTH, b);
   }
-  else if (id == SK_FIRST_CTN_PRE)
+  else if (id == SkolemId::SK_FIRST_CTN_PRE)
   {
     // SK_FIRST_CTN_PRE(x,y) ---> SK_PREFIX(x, indexof(x,y,0))
-    id = SK_PREFIX;
+    id = SkolemId::SK_PREFIX;
     b = nm->mkNode(STRING_STRIDOF, a, b, d_zero);
   }
 
-  if (id == SK_ID_V_UNIFIED_SPT || id == SK_ID_V_UNIFIED_SPT_REV)
+  if (id == SkolemId::SK_ID_V_UNIFIED_SPT
+      || id == SkolemId::SK_ID_V_UNIFIED_SPT_REV)
   {
-    bool isRev = (id == SK_ID_V_UNIFIED_SPT_REV);
+    bool isRev = (id == SkolemId::SK_ID_V_UNIFIED_SPT_REV);
     Node la = nm->mkNode(STRING_LENGTH, a);
     Node lb = nm->mkNode(STRING_LENGTH, b);
     Node ta = isRev ? utils::mkPrefix(a, nm->mkNode(MINUS, la, lb))
                     : utils::mkSuffix(a, lb);
     Node tb = isRev ? utils::mkPrefix(b, nm->mkNode(MINUS, lb, la))
                     : utils::mkSuffix(b, la);
-    id = SK_PURIFY;
+    id = SkolemId::SK_PURIFY;
     a = nm->mkNode(ITE, nm->mkNode(GEQ, la, lb), ta, tb);
     b = Node::null();
   }
 
   // now, eliminate prefix/suffix_rem in terms of purify
-  if (id == SK_PREFIX)
+  if (id == SkolemId::SK_PREFIX)
   {
     // SK_PREFIX(x,y) ---> SK_PURIFY(substr(x,0,y))
-    id = SK_PURIFY;
+    id = SkolemId::SK_PURIFY;
     a = utils::mkPrefix(a, b);
     b = Node::null();
   }
-  else if (id == SK_SUFFIX_REM)
+  else if (id == SkolemId::SK_SUFFIX_REM)
   {
     // SK_SUFFIX_REM(x,y) ---> SK_PURIFY(substr(x,y,str.len(x)-y))
-    id = SK_PURIFY;
+    id = SkolemId::SK_PURIFY;
     a = utils::mkSuffix(a, b);
     b = Node::null();
   }
@@ -242,6 +276,43 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b)
   return std::make_tuple(id, a, b);
 }
 
+const char* toString(SkolemId id)
+{
+  switch (id)
+  {
+    case SkolemId::SK_PURIFY: return "SK_PURIFY";
+    case SkolemId::SK_ID_C_SPT: return "SK_ID_C_SPT";
+    case SkolemId::SK_ID_C_SPT_REV: return "SK_ID_C_SPT_REV";
+    case SkolemId::SK_ID_VC_SPT: return "SK_ID_VC_SPT";
+    case SkolemId::SK_ID_VC_SPT_REV: return "SK_ID_VC_SPT_REV";
+    case SkolemId::SK_ID_V_SPT: return "SK_ID_V_SPT";
+    case SkolemId::SK_ID_V_SPT_REV: return "SK_ID_V_SPT_REV";
+    case SkolemId::SK_ID_V_UNIFIED_SPT: return "SK_ID_V_UNIFIED_SPT";
+    case SkolemId::SK_ID_V_UNIFIED_SPT_REV: return "SK_ID_V_UNIFIED_SPT_REV";
+    case SkolemId::SK_ID_DC_SPT: return "SK_ID_DC_SPT";
+    case SkolemId::SK_ID_DC_SPT_REM: return "SK_ID_DC_SPT_REM";
+    case SkolemId::SK_ID_DEQ_X: return "SK_ID_DEQ_X";
+    case SkolemId::SK_ID_DEQ_Y: return "SK_ID_DEQ_Y";
+    case SkolemId::SK_FIRST_CTN_PRE: return "SK_FIRST_CTN_PRE";
+    case SkolemId::SK_FIRST_CTN_POST: return "SK_FIRST_CTN_POST";
+    case SkolemId::SK_FIRST_CTN_IOPRE: return "SK_FIRST_CTN_IOPRE";
+    case SkolemId::SK_FIRST_CTN_IOPOST: return "SK_FIRST_CTN_IOPOST";
+    case SkolemId::SK_FIRST_CTN_RFCPRE: return "SK_FIRST_CTN_RFCPRE";
+    case SkolemId::SK_FIRST_CTN_RFCPOST: return "SK_FIRST_CTN_RFCPOST";
+    case SkolemId::SK_PREFIX: return "SK_PREFIX";
+    case SkolemId::SK_SUFFIX_REM: return "SK_SUFFIX_REM";
+    case SkolemId::SK_NUM_OCCUR: return "SK_NUM_OCCUR";
+    case SkolemId::SK_OCCUR_INDEX: return "SK_OCCUR_INDEX";
+    default: return "?";
+  }
+}
+
+std::ostream& operator<<(std::ostream& out, SkolemId id)
+{
+  out << toString(id);
+  return out;
+}
+
 }  // namespace strings
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h
index a88b3cfb7..4f2c05b99 100644
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -28,6 +28,90 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
+class SequencesStatistics;
+
+/** Identifiers for skolem types
+ *
+ * The comments below document the properties of each skolem introduced by
+ * inference in the strings solver, where by skolem we mean the fresh
+ * string variable that witnesses each of "exists k".
+ *
+ * The skolems with _REV suffixes are used for the reverse version of the
+ * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
+ *
+ * All skolems assume a and b are strings unless otherwise stated.
+ */
+enum class SkolemId
+{
+  // exists k. k = a
+  SK_PURIFY,
+  // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
+  //    exists k. a = "cccc" ++ k
+  SK_ID_C_SPT,
+  SK_ID_C_SPT_REV,
+  // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
+  //    exists k. a = "c" ++ k
+  SK_ID_VC_SPT,
+  SK_ID_VC_SPT_REV,
+  // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
+  //    exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
+  SK_ID_V_SPT,
+  SK_ID_V_SPT_REV,
+  SK_ID_V_UNIFIED_SPT,
+  SK_ID_V_UNIFIED_SPT_REV,
+  // a != ""  ^ b = "c" ^ a ++ a' != b ++ b' =>
+  //    exists k, k_rem.
+  //         len( k ) = 1 ^
+  //         ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) )
+  SK_ID_DC_SPT,
+  SK_ID_DC_SPT_REM,
+  // a != ""  ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' =>
+  //    exists k_x k_y k_z.
+  //         ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0
+  //           ( a = k_x ++ k_z OR b = k_y ++ k_z ) )
+  SK_ID_DEQ_X,
+  SK_ID_DEQ_Y,
+  // contains( a, b ) =>
+  //    exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
+  //                          ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
+  //
+  // As an optimization, these skolems are reused for positive occurrences of
+  // contains, where they have the semantics:
+  //
+  //   contains( a, b ) =>
+  //      exists k_pre, k_post. a = k_pre ++ b ++ k_post
+  //
+  // We reuse them since it is sound to consider w.l.o.g. the first occurrence
+  // of b in a as the witness for contains( a, b ).
+  SK_FIRST_CTN_PRE,
+  SK_FIRST_CTN_POST,
+
+  SK_FIRST_CTN_IOPRE,
+  SK_FIRST_CTN_IOPOST,
+
+  SK_FIRST_CTN_RFCPRE,
+  SK_FIRST_CTN_RFCPOST,
+
+  // For integer b,
+  // len( a ) > b =>
+  //    exists k. a = k ++ a' ^ len( k ) = b
+  SK_PREFIX,
+  // For integer b,
+  // b > 0 =>
+  //    exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
+  SK_SUFFIX_REM,
+  // --------------- integer skolems
+  // exists k. ( b occurs k times in a )
+  SK_NUM_OCCUR,
+  // --------------- function skolems
+  // For function k: Int -> Int
+  //   exists k.
+  //     forall 0 <= x <= n,
+  //       k(x) is the end index of the x^th occurrence of b in a
+  //   where n is the number of occurrences of b in a, and k(0)=0.
+  SK_OCCUR_INDEX,
+};
+
 /**
  * A cache of all string skolems generated by the TheoryStrings class. This
  * cache is used to ensure that duplicate skolems are not generated when
@@ -42,81 +126,7 @@ class SkolemCache
    * useOpts determines if we aggressively share Skolems or return the constants
    * they are entailed to be equal to.
    */
-  SkolemCache(bool useOpts = true);
-  /** Identifiers for skolem types
-   *
-   * The comments below document the properties of each skolem introduced by
-   * inference in the strings solver, where by skolem we mean the fresh
-   * string variable that witnesses each of "exists k".
-   *
-   * The skolems with _REV suffixes are used for the reverse version of the
-   * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
-   *
-   * All skolems assume a and b are strings unless otherwise stated.
-   */
-  enum SkolemId
-  {
-    // exists k. k = a
-    SK_PURIFY,
-    // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
-    //    exists k. a = "cccc" ++ k
-    SK_ID_C_SPT,
-    SK_ID_C_SPT_REV,
-    // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
-    //    exists k. a = "c" ++ k
-    SK_ID_VC_SPT,
-    SK_ID_VC_SPT_REV,
-    // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
-    //    exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
-    SK_ID_V_SPT,
-    SK_ID_V_SPT_REV,
-    SK_ID_V_UNIFIED_SPT,
-    SK_ID_V_UNIFIED_SPT_REV,
-    // a != ""  ^ b = "c" ^ a ++ a' != b ++ b' =>
-    //    exists k, k_rem.
-    //         len( k ) = 1 ^
-    //         ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) )
-    SK_ID_DC_SPT,
-    SK_ID_DC_SPT_REM,
-    // a != ""  ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' =>
-    //    exists k_x k_y k_z.
-    //         ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0
-    //           ( a = k_x ++ k_z OR b = k_y ++ k_z ) )
-    SK_ID_DEQ_X,
-    SK_ID_DEQ_Y,
-    // contains( a, b ) =>
-    //    exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
-    //                          ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
-    //
-    // As an optimization, these skolems are reused for positive occurrences of
-    // contains, where they have the semantics:
-    //
-    //   contains( a, b ) =>
-    //      exists k_pre, k_post. a = k_pre ++ b ++ k_post
-    //
-    // We reuse them since it is sound to consider w.l.o.g. the first occurrence
-    // of b in a as the witness for contains( a, b ).
-    SK_FIRST_CTN_PRE,
-    SK_FIRST_CTN_POST,
-    // For integer b,
-    // len( a ) > b =>
-    //    exists k. a = k ++ a' ^ len( k ) = b
-    SK_PREFIX,
-    // For integer b,
-    // b > 0 =>
-    //    exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
-    SK_SUFFIX_REM,
-    // --------------- integer skolems
-    // exists k. ( b occurs k times in a )
-    SK_NUM_OCCUR,
-    // --------------- function skolems
-    // For function k: Int -> Int
-    //   exists k.
-    //     forall 0 <= x <= n,
-    //       k(x) is the end index of the x^th occurrence of b in a
-    //   where n is the number of occurrences of b in a, and k(0)=0.
-    SK_OCCUR_INDEX,
-  };
+  SkolemCache(SequencesStatistics& statistics, bool useOpts = true);
   /**
    * Returns a skolem of type string that is cached for (a,b,id) and has
    * name c.
@@ -155,6 +165,8 @@ class SkolemCache
   std::tuple<SkolemId, Node, Node> normalizeStringSkolem(SkolemId id,
                                                          Node a,
                                                          Node b);
+
+  SequencesStatistics& d_statistics;
   /** whether we are using optimizations */
   bool d_useOpts;
   /** string type */
@@ -163,12 +175,17 @@ class SkolemCache
   Node d_zero;
   /** map from node pairs and identifiers to skolems */
   std::map<Node, std::map<Node, std::map<SkolemId, Node> > > d_skolemCache;
+  std::map<Node, std::map<Node, std::map<SkolemId, Node> > >
+      d_skolemCachePreOnly;
   /** the set of all skolems we have generated */
   std::unordered_set<Node, NodeHashFunction> d_allSkolems;
   /** A proof skolem cache */
   ProofSkolemCache d_pskc;
 };
 
+const char* toString(SkolemId id);
+std::ostream& operator<<(std::ostream& out, SkolemId id);
+
 }  // namespace strings
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
index 631d4b8e4..276aab3a2 100644
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -44,6 +44,7 @@ TermRegistry::TermRegistry(context::Context* c,
       d_out(out),
       d_statistics(statistics),
       d_hasStrCode(false),
+      d_skCache(statistics),
       d_functionsTerms(c),
       d_inputVars(u),
       d_preregisteredTerms(u),
@@ -276,7 +277,7 @@ Node TermRegistry::getRegisterTermLemma(Node n)
       return Node::null();
     }
   }
-  Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
+  Node sk = d_skCache.mkSkolemCached(n, SkolemId::SK_PURIFY, "lsym");
   StringsProxyVarAttribute spva;
   sk.setAttribute(spva, true);
   Node eq = Rewriter::rewrite(sk.eqNode(n));
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index 939146a3d..1c7ffb4a2 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -59,7 +59,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     Node s = t[0];
     Node n = t[1];
     Node m = t[2];
-    Node skt = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
+    Node skt = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "sst");
     Node t12 = nm->mkNode(PLUS, n, m);
     t12 = Rewriter::rewrite(t12);
     Node lt0 = nm->mkNode(STRING_LENGTH, s);
@@ -73,13 +73,13 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
 
     Node emp = Word::mkEmptyWord(t.getType());
 
-    Node sk1 = n == d_zero ? emp
-                           : d_sc->mkSkolemCached(
-                                 s, n, SkolemCache::SK_PREFIX, "sspre");
-    Node sk2 = ArithEntail::check(t12, lt0)
+    Node sk1 = n == d_zero
                    ? emp
-                   : d_sc->mkSkolemCached(
-                       s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
+                   : d_sc->mkSkolemCached(s, n, SkolemId::SK_PREFIX, "sspre");
+    Node sk2 =
+        ArithEntail::check(t12, lt0)
+            ? emp
+            : d_sc->mkSkolemCached(s, t12, SkolemId::SK_SUFFIX_REM, "sssufr");
     Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2));
     //length of first skolem is second argument
     Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
@@ -139,9 +139,9 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
                          n,
                          nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n));
     Node io2 =
-        d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre");
+        d_sc->mkSkolemCached(st, y, SkolemId::SK_FIRST_CTN_IOPRE, "iopre");
     Node io4 =
-        d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost");
+        d_sc->mkSkolemCached(st, y, SkolemId::SK_FIRST_CTN_IOPOST, "iopost");
 
     // ~contains( substr( x, n, len( x ) - n ), y )
     Node c11 = nm->mkNode(STRING_STRCTN, st, y).negate();
@@ -198,7 +198,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
   {
     // processing term:  int.to.str( n )
     Node n = t[0];
-    Node itost = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost");
+    Node itost = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "itost");
     Node leni = nm->mkNode(STRING_LENGTH, itost);
 
     std::vector<Node> conc;
@@ -362,10 +362,10 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     Node z = t[2];
     TypeNode tn = t[0].getType();
     Node rp1 =
-        d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre");
+        d_sc->mkSkolemCached(x, y, SkolemId::SK_FIRST_CTN_RFCPRE, "rfcpre");
     Node rp2 =
-        d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost");
-    Node rpw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw");
+        d_sc->mkSkolemCached(x, y, SkolemId::SK_FIRST_CTN_RFCPOST, "rfcpost");
+    Node rpw = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "rpw");
 
     // y = ""
     Node emp = Word::mkEmptyWord(tn);
@@ -421,17 +421,17 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     Node x = t[0];
     Node y = t[1];
     Node z = t[2];
-    Node rpaw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw");
+    Node rpaw = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "rpaw");
 
     Node numOcc = d_sc->mkTypedSkolemCached(
-        nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
+        nm->integerType(), x, y, SkolemId::SK_NUM_OCCUR, "numOcc");
     std::vector<TypeNode> argTypes;
     argTypes.push_back(nm->integerType());
     Node us =
         nm->mkSkolem("Us", nm->mkFunctionType(argTypes, nm->stringType()));
     TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
-    Node uf = d_sc->mkTypedSkolemCached(
-        ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
+    Node uf =
+        d_sc->mkTypedSkolemCached(ufType, x, y, SkolemId::SK_OCCUR_INDEX, "Uf");
 
     Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
     Node usno = nm->mkNode(APPLY_UF, us, numOcc);
@@ -495,7 +495,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
   else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
   {
     Node x = t[0];
-    Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
+    Node r = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "r");
 
     Node lenx = nm->mkNode(STRING_LENGTH, x);
     Node lenr = nm->mkNode(STRING_LENGTH, r);
@@ -541,7 +541,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
   else if (t.getKind() == STRING_REV)
   {
     Node x = t[0];
-    Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
+    Node r = d_sc->mkSkolemCached(t, SkolemId::SK_PURIFY, "r");
 
     Node lenx = nm->mkNode(STRING_LENGTH, x);
     Node lenr = nm->mkNode(STRING_LENGTH, r);
author	Andres Noetzli <andres.noetzli@gmail.com>	2020-05-08 22:42:22 -0700
committer	Andres Noetzli <andres.noetzli@gmail.com>	2020-05-11 22:22:48 -0700
commit	5a610d9d82a40ba9e78a1a948c82aa42e7b961ae (patch)
tree	fd69a4b1b52d1ef5ec8723c537f6179913d5be33
parent	ea7a9c71305bad749f4e148097585fdcc28004f8 (diff)