Formalize more string skolems (#7554)

This properly formalizes all string skolems used in reduction and preprocessing. This avoids proof checking failures due to non-deterministism when checking steps for these modules. Fixes cvc5/cvc5-projects#334. Fixes cvc5/cvc5-projects#331.
author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2021-11-03 11:12:18 -0500
committer: GitHub <noreply@github.com> 2021-11-03 16:12:18 +0000
commit: d527b3f2501410770a76977efa180009e06ea172 (patch)
tree: 2740aea9ed89c510e5ccb14b89ca91822609978b /src/theory
parent: 324434a74b35d0e58bdb551c9155e9fb32844d07 (diff)
5 files changed, 69 insertions, 80 deletions
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index b5e15a129..0b7294f2c 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -2463,8 +2463,7 @@ void CoreSolver::processDeqExtensionality(Node n1, Node n2)
   NodeManager* nm = NodeManager::currentNM();
   SkolemCache* sc = d_termReg.getSkolemCache();
   TypeNode intType = nm->integerType();
-  Node k = sc->mkTypedSkolemCached(
-      intType, n1, n2, SkolemCache::SK_DEQ_DIFF, "diff");
+  Node k = sc->mkSkolemFun(SkolemFunId::STRINGS_DEQ_DIFF, intType, n1, n2);
   Node deq = eq.negate();
   Node ss1, ss2;
   if (n1.getType().isString())
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp
index 0d2e9cacb..60e6b6a01 100644
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -119,7 +119,6 @@ Node SkolemCache::mkTypedSkolemCached(
     case SK_ID_V_SPT_REV:
     case SK_ID_VC_SPT:
     case SK_ID_VC_SPT_REV:
-    case SK_FIRST_CTN_POST:
     case SK_ID_C_SPT:
     case SK_ID_C_SPT_REV:
     case SK_ID_DC_SPT:
@@ -127,12 +126,11 @@ Node SkolemCache::mkTypedSkolemCached(
     case SK_ID_DEQ_X:
     case SK_ID_DEQ_Y:
     case SK_FIRST_CTN_PRE:
+    case SK_FIRST_CTN_POST:
     case SK_PREFIX:
     case SK_SUFFIX_REM:
       Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl;
       break;
-    case SK_NUM_OCCUR:
-    case SK_OCCUR_INDEX:
     default:
     {
       Notice() << "Don't know how to handle Skolem ID " << id << std::endl;
@@ -318,6 +316,25 @@ Node SkolemCache::mkLengthVar(Node t)
   return bvm->mkBoundVar<LengthVarAttribute>(t, intType);
 }
 
+Node SkolemCache::mkSkolemFun(SkolemFunId id, TypeNode tn, Node a, Node b)
+{
+  std::vector<Node> cacheVals;
+  for (size_t i = 0; i < 2; i++)
+  {
+    Node n = i == 0 ? a : b;
+    if (!n.isNull())
+    {
+      n = d_rr != nullptr ? d_rr->rewrite(n) : n;
+      cacheVals.push_back(n);
+    }
+  }
+  NodeManager* nm = NodeManager::currentNM();
+  SkolemManager* sm = nm->getSkolemManager();
+  Node k = sm->mkSkolemFunction(id, tn, cacheVals);
+  d_allSkolems.insert(k);
+  return k;
+}
+
 }  // namespace strings
 }  // namespace theory
 }  // namespace cvc5
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h
index 2b714781b..ff74e5c0d 100644
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -58,6 +58,10 @@ class SkolemCache
    * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
    *
    * All skolems assume a and b are strings unless otherwise stated.
+   *
+   * Notice that these identifiers are each syntax sugar for constructing a
+   * purification skolem. It is required for the purposes of proof checking
+   * that this only results in calls to SkolemManager::mkPurifySkolem.
    */
   enum SkolemId
   {
@@ -107,21 +111,6 @@ class SkolemCache
     // of b in a as the witness for contains( a, b ).
     SK_FIRST_CTN_PRE,
     SK_FIRST_CTN_POST,
-    // For sequence a and regular expression b,
-    // in_re(a, re.++(_*, b, _*)) =>
-    //    exists k_pre, k_match, k_post.
-    //       a = k_pre ++ k_match ++ k_post ^
-    //       len(k_pre) = indexof_re(x, y, 0) ^
-    //       (forall l. 0 < l < len(k_match) =>
-    //         ~in_re(substr(k_match, 0, l), r)) ^
-    //       in_re(k_match, b)
-    //
-    // k_pre is the prefix before the first, shortest match of b in a. k_match
-    // is the substring of a matched by b. It is either empty or there is no
-    // shorter string that matches b.
-    SK_FIRST_MATCH_PRE,
-    SK_FIRST_MATCH,
-    SK_FIRST_MATCH_POST,
     // For integer b,
     // len( a ) > b =>
     //    exists k. a = k ++ a' ^ len( k ) = b
@@ -129,33 +118,7 @@ class SkolemCache
     // For integer b,
     // b > 0 =>
     //    exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
-    SK_SUFFIX_REM,
-    // --------------- integer skolems
-    // exists k. ( b occurs k times in a )
-    SK_NUM_OCCUR,
-    // --------------- function skolems
-    // For function k: Int -> Int
-    //   exists k.
-    //     forall 0 <= x <= n,
-    //       k(x) is the end index of the x^th occurrence of b in a
-    //   where n is the number of occurrences of b in a, and k(0)=0.
-    SK_OCCUR_INDEX,
-    // For function k: Int -> Int
-    //   exists k.
-    //     forall 0 <= x < n,
-    //       k(x) is the length of the x^th occurrence of b in a (excluding
-    //       matches of empty strings)
-    //   where b is a regular expression, n is the number of occurrences of b
-    //   in a, and k(0)=0.
-    SK_OCCUR_LEN,
-    // For function k: ((Seq U) x Int) -> U
-    // exists k.
-    // forall s, n.
-    //  k(s, n) is some undefined value of sort U
-    SK_NTH,
-    // Diff index for disequalities
-    // a != b => substr(a,k,1) != substr(b,k,1)
-    SK_DEQ_DIFF
+    SK_SUFFIX_REM
   };
   /**
    * Returns a skolem of type string that is cached for (a,b,id) and has
@@ -201,6 +164,19 @@ class SkolemCache
    * that could be matched by r.
    */
   static Node mkLengthVar(Node t);
+  /**
+   * Make skolem function, possibly normalizing based on the rewriter of this
+   * class. This method should be used whenever it is not possible to define
+   * a Skolem identifier that amounts to purification of a term.
+   *
+   * Notice that this method is not static or constant since it tracks the
+   * Skolem we construct (in d_allSkolems), which is used for finite model
+   * finding.
+   */
+  Node mkSkolemFun(SkolemFunId id,
+                   TypeNode tn,
+                   Node a = Node::null(),
+                   Node b = Node::null());
 
  private:
   /**
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 9a793e14f..e0f0a8dac 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -998,21 +998,20 @@ TrustNode TheoryStrings::ppRewrite(TNode atom, std::vector<SkolemLemma>& lems)
   }
   if (atom.getKind() == STRING_FROM_CODE)
   {
-    // str.from_code(t) --->
-    //   witness k. ite(0 <= t < |A|, t = str.to_code(k), k = "")
+    // str.from_code(t) ---> ite(0 <= t < |A|, t = str.to_code(k), k = "")
     NodeManager* nm = NodeManager::currentNM();
+    SkolemCache* sc = d_termReg.getSkolemCache();
+    Node k = sc->mkSkolemCached(atom, SkolemCache::SK_PURIFY, "kFromCode");
     Node t = atom[0];
     Node card = nm->mkConst(Rational(d_termReg.getAlphabetCardinality()));
     Node cond =
         nm->mkNode(AND, nm->mkNode(LEQ, d_zero, t), nm->mkNode(LT, t, card));
-    Node v = nm->mkBoundVar(nm->stringType());
     Node emp = Word::mkEmptyWord(atom.getType());
     Node pred = nm->mkNode(
-        ITE, cond, t.eqNode(nm->mkNode(STRING_TO_CODE, v)), v.eqNode(emp));
-    SkolemManager* sm = nm->getSkolemManager();
-    Node ret = sm->mkSkolem(v, pred, "kFromCode");
-    lems.push_back(SkolemLemma(ret, nullptr));
-    return TrustNode::mkTrustRewrite(atom, ret, nullptr);
+        ITE, cond, t.eqNode(nm->mkNode(STRING_TO_CODE, k)), k.eqNode(emp));
+    TrustNode tnk = TrustNode::mkTrustLemma(pred);
+    lems.push_back(SkolemLemma(tnk, k));
+    return TrustNode::mkTrustRewrite(atom, k, nullptr);
   }
   TrustNode ret;
   Node atomRet = atom;
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index 550a9af8b..31f43d565 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -53,7 +53,6 @@ Node StringsPreprocess::reduce(Node t,
       << "StringsPreprocess::reduce: " << t << std::endl;
   Node retNode = t;
   NodeManager* nm = NodeManager::currentNM();
-  SkolemManager* sm = nm->getSkolemManager();
   Node zero = nm->mkConst(Rational(0));
   Node one = nm->mkConst(Rational(1));
   Node negOne = nm->mkConst(Rational(-1));
@@ -343,8 +342,8 @@ Node StringsPreprocess::reduce(Node t,
     std::vector<Node> conc;
     std::vector< TypeNode > argTypes;
     argTypes.push_back(nm->integerType());
-    Node u =
-        sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType()));
+    TypeNode itosResType = nm->mkFunctionType(argTypes, nm->integerType());
+    Node u = sc->mkSkolemFun(SkolemFunId::STRINGS_ITOS_RESULT, itosResType, t);
 
     Node lem = nm->mkNode(GEQ, leni, one);
     conc.push_back(lem);
@@ -423,7 +422,8 @@ Node StringsPreprocess::reduce(Node t,
 
     Node emp = Word::mkEmptyWord(s.getType());
     Node sEmpty = s.eqNode(emp);
-    Node k = sm->mkDummySkolem("k", nm->integerType());
+    Node k = sc->mkSkolemFun(
+        SkolemFunId::STRINGS_STOI_NON_DIGIT, nm->integerType(), t);
     Node kc1 = nm->mkNode(GEQ, k, zero);
     Node kc2 = nm->mkNode(LT, k, lens);
     Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
@@ -439,8 +439,9 @@ Node StringsPreprocess::reduce(Node t,
     std::vector<Node> conc2;
     std::vector< TypeNode > argTypes;
     argTypes.push_back(nm->integerType());
+    TypeNode stoiResultType = nm->mkFunctionType(argTypes, nm->integerType());
     Node u =
-        sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType()));
+        sc->mkSkolemFun(SkolemFunId::STRINGS_STOI_RESULT, stoiResultType, t);
 
     lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens));
     conc2.push_back(lem);
@@ -605,15 +606,15 @@ Node StringsPreprocess::reduce(Node t,
     Node z = t[2];
     Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw");
 
-    Node numOcc = sc->mkTypedSkolemCached(
-        nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
+    Node numOcc = sc->mkSkolemFun(
+        SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y);
     std::vector<TypeNode> argTypes;
     argTypes.push_back(nm->integerType());
-    Node us =
-        sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, nm->stringType()));
+    TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType());
+    Node us = sc->mkSkolemFun(
+        SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t);
     TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
-    Node uf = sc->mkTypedSkolemCached(
-        ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
+    Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y);
 
     Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
     Node usno = nm->mkNode(APPLY_UF, us, numOcc);
@@ -691,12 +692,10 @@ Node StringsPreprocess::reduce(Node t,
     // k = z ++ x
     Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x));
 
-    Node k1 =
-        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre");
-    Node k2 =
-        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match");
-    Node k3 =
-        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post");
+    TypeNode ktype = t.getType();
+    Node k1 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_PRE, ktype, x, y);
+    Node k2 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH, ktype, x, y);
+    Node k3 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_POST, ktype, x, y);
     Node k2Len = nm->mkNode(STRING_LENGTH, k2);
     // x = k1 ++ k2 ++ k3
     Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
@@ -740,17 +739,16 @@ Node StringsPreprocess::reduce(Node t,
     Node z = t[2];
     Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
 
-    Node numOcc = sc->mkTypedSkolemCached(
-        nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
+    Node numOcc = sc->mkSkolemFun(
+        SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y);
     std::vector<TypeNode> argTypes;
     argTypes.push_back(nm->integerType());
-    Node us =
-        sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, t.getType()));
+    TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType());
+    Node us = sc->mkSkolemFun(
+        SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t);
     TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
-    Node uf = sc->mkTypedSkolemCached(
-        ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
-    Node ul =
-        sc->mkTypedSkolemCached(ufType, x, y, SkolemCache::SK_OCCUR_LEN, "Ul");
+    Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y);
+    Node ul = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_LEN, ufType, x, y);
 
     Node emp = Word::mkEmptyWord(t.getType());
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2021-11-03 11:12:18 -0500
committer	GitHub <noreply@github.com>	2021-11-03 16:12:18 +0000
commit	d527b3f2501410770a76977efa180009e06ea172 (patch)
tree	2740aea9ed89c510e5ccb14b89ca91822609978b /src/theory
parent	324434a74b35d0e58bdb551c9155e9fb32844d07 (diff)