Better formalization of regular expression unfolding skolems (#6602)

This replaces our previous formalization of RE unfolding skolems with a more explicit one that is amenable to external proof conversion. It adds a few associated utility methods to SkolemManager required for LFSC proof conversion for RE_UNFOLD_POS. It also changes the order of equalities in the RE_UNFOLD_POS rule, which simplifies LFSC proof checking.
author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2021-05-24 09:39:56 -0500
committer: GitHub <noreply@github.com> 2021-05-24 09:39:56 -0500
commit: b00667a21984dc3f6b960a1a2d22845c7096bc35 (patch)
tree: 8a6dc1bbe37bcc1617f672403800999dd1cc203a /src
parent: 0e9fed387aaf5d0cb1844ab20551fbebfcb8893d (diff)
5 files changed, 59 insertions, 74 deletions
diff --git a/src/expr/skolem_manager.cpp b/src/expr/skolem_manager.cpp
index 4004bf6fe..8e1c5e54c 100644
--- a/src/expr/skolem_manager.cpp
+++ b/src/expr/skolem_manager.cpp
@@ -53,6 +53,7 @@ const char* toString(SkolemFunId id)
     case SkolemFunId::SELECTOR_WRONG: return "SELECTOR_WRONG";
     case SkolemFunId::SHARED_SELECTOR: return "SHARED_SELECTOR";
     case SkolemFunId::SEQ_NTH_OOB: return "SEQ_NTH_OOB";
+    case SkolemFunId::RE_UNFOLD_POS_COMPONENT: return "RE_UNFOLD_POS_COMPONENT";
     default: return "?";
   }
 }
@@ -206,11 +207,37 @@ Node SkolemManager::mkSkolemFunction(SkolemFunId id,
     ss << "SKOLEM_FUN_" << id;
     Node k = nm->mkSkolem(ss.str(), tn, "an internal skolem function", flags);
     d_skolemFuns[key] = k;
+    d_skolemFunMap[k] = key;
     return k;
   }
   return it->second;
 }
 
+Node SkolemManager::mkSkolemFunction(SkolemFunId id,
+                                     TypeNode tn,
+                                     const std::vector<Node>& cacheVals,
+                                     int flags)
+{
+  Assert(cacheVals.size() > 1);
+  Node cacheVal = NodeManager::currentNM()->mkNode(SEXPR, cacheVals);
+  return mkSkolemFunction(id, tn, cacheVal, flags);
+}
+
+bool SkolemManager::isSkolemFunction(Node k,
+                                     SkolemFunId& id,
+                                     Node& cacheVal) const
+{
+  std::map<Node, std::tuple<SkolemFunId, TypeNode, Node>>::const_iterator it =
+      d_skolemFunMap.find(k);
+  if (it == d_skolemFunMap.end())
+  {
+    return false;
+  }
+  id = std::get<0>(it->second);
+  cacheVal = std::get<2>(it->second);
+  return true;
+}
+
 Node SkolemManager::mkDummySkolem(const std::string& prefix,
                                   const TypeNode& type,
                                   const std::string& comment,
diff --git a/src/expr/skolem_manager.h b/src/expr/skolem_manager.h
index d62153941..a7d35d155 100644
--- a/src/expr/skolem_manager.h
+++ b/src/expr/skolem_manager.h
@@ -29,6 +29,7 @@ class ProofGenerator;
 /** Skolem function identifier */
 enum class SkolemFunId
 {
+  NONE,
   /** an uninterpreted function f s.t. f(x) = x / 0.0 (real division) */
   DIV_BY_ZERO,
   /** an uninterpreted function f s.t. f(x) = x / 0 (integer division) */
@@ -43,6 +44,13 @@ enum class SkolemFunId
   SHARED_SELECTOR,
   /** an application of seq.nth that is out of bounds */
   SEQ_NTH_OOB,
+  /**
+   * Regular expression unfold component: if (str.in_re t R), where R is
+   * (re.++ r0 ... rn), then the RE_UNFOLD_POS_COMPONENT{t,R,i} is a string
+   * skolem ki such that t = (str.++ k0 ... kn) and (str.in_re k0 r0) for
+   * i = 0, ..., n.
+   */
+  RE_UNFOLD_POS_COMPONENT,
 };
 /** Converts a skolem function name to a string. */
 const char* toString(SkolemFunId id);
@@ -235,6 +243,16 @@ class SkolemManager
                         TypeNode tn,
                         Node cacheVal = Node::null(),
                         int flags = NodeManager::SKOLEM_DEFAULT);
+  /** Same as above, with multiple cache values */
+  Node mkSkolemFunction(SkolemFunId id,
+                        TypeNode tn,
+                        const std::vector<Node>& cacheVals,
+                        int flags = NodeManager::SKOLEM_DEFAULT);
+  /**
+   * Is k a skolem function? Returns true if k was generated by the above call.
+   * Updates the arguments to the values used when constructing it.
+   */
+  bool isSkolemFunction(Node k, SkolemFunId& id, Node& cacheVal) const;
   /**
    * Create a skolem constant with the given name, type, and comment. This
    * should only be used if the definition of the skolem does not matter.
@@ -281,10 +299,10 @@ class SkolemManager
   static Node getOriginalForm(Node n);
 
  private:
-  /**
-   * Cached of skolem functions for mkSkolemFunction above.
-   */
+  /** Cache of skolem functions for mkSkolemFunction above. */
   std::map<std::tuple<SkolemFunId, TypeNode, Node>, Node> d_skolemFuns;
+  /** Backwards mapping of above */
+  std::map<Node, std::tuple<SkolemFunId, TypeNode, Node>> d_skolemFunMap;
   /**
    * Mapping from witness terms to proof generators.
    */
diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp
index 8ee29f712..29a4187ec 100644
--- a/src/theory/strings/infer_proof_cons.cpp
+++ b/src/theory/strings/infer_proof_cons.cpp
@@ -712,11 +712,11 @@ void InferProofCons::convert(InferenceId infer,
           }
           else if (eunf.getKind() == AND)
           {
-            // equality is the last conjunct
+            // equality is the first conjunct
             std::vector<Node> childrenAE;
             childrenAE.push_back(eunf);
             std::vector<Node> argsAE;
-            argsAE.push_back(nm->mkConst(Rational(eunf.getNumChildren() - 1)));
+            argsAE.push_back(nm->mkConst(Rational(0)));
             Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE);
             Trace("strings-ipc-prefix")
                 << "--- and elim to " << eunfAE << std::endl;
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index bf4c20f85..96351cda9 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -1017,16 +1017,7 @@ Node RegExpOpr::reduceRegExpPos(Node mem,
   {
     std::vector<Node> nvec;
     std::vector<Node> cc;
-    // get the (valid) existential for this membership
-    Node eform = getExistsForRegExpConcatMem(mem);
     SkolemManager* sm = nm->getSkolemManager();
-    // Notice that this rule does not introduce witness terms, instead it
-    // uses skolems in the conclusion of the proof rule directly. Thus,
-    // the existential eform does not need to be explicitly justified by a
-    // proof here, since it is only being used as an intermediate formula in
-    // this inference. Hence we do not pass a proof generator to mkSkolemize.
-    sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem");
-    Assert(newSkolems.size() == r.getNumChildren());
     // Look up skolems for each of the components. If sc has optimizations
     // enabled, this will return arguments of str.to_re.
     for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
@@ -1034,17 +1025,22 @@ Node RegExpOpr::reduceRegExpPos(Node mem,
       if (r[i].getKind() == STRING_TO_REGEXP)
       {
         // optimization, just take the body
-        newSkolems[i] = r[i][0];
+        newSkolems.push_back(r[i][0]);
       }
       else
       {
+        Node ivalue = nm->mkConst(Rational(i));
+        Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT,
+                                       s.getType(),
+                                       {mem[0], mem[1], ivalue});
+        newSkolems.push_back(sk);
         nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
       }
     }
-    // (str.in_re x (re.++ R1 .... Rn)) =>
-    // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn)))
+    // (str.in_re x (re.++ R0 .... Rn)) =>
+    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
     Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
-    nvec.push_back(lem);
+    nvec.insert(nvec.begin(), lem);
     conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
   }
   else if (k == REGEXP_STAR)
@@ -1574,50 +1570,6 @@ bool RegExpOpr::regExpIncludes(Node r1, Node r2)
   return result;
 }
 
-/**
- * Associating formulas with their "exists form", or an existentially
- * quantified formula that is equivalent to it. This is currently used
- * for regular expression memberships in the method below.
- */
-struct ExistsFormAttributeId
-{
-};
-typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute;
-
-Node RegExpOpr::getExistsForRegExpConcatMem(Node mem)
-{
-  // get or make the exists form of the membership
-  ExistsFormAttribute efa;
-  if (mem.hasAttribute(efa))
-  {
-    // already computed
-    return mem.getAttribute(efa);
-  }
-  Assert(mem.getKind() == STRING_IN_REGEXP);
-  Node x = mem[0];
-  Node r = mem[1];
-  Assert(r.getKind() == REGEXP_CONCAT);
-  NodeManager* nm = NodeManager::currentNM();
-  TypeNode xtn = x.getType();
-  std::vector<Node> vars;
-  std::vector<Node> mems;
-  for (const Node& rc : r)
-  {
-    Node v = nm->mkBoundVar(xtn);
-    vars.push_back(v);
-    mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc));
-  }
-  Node sconcat = nm->mkNode(STRING_CONCAT, vars);
-  Node eq = x.eqNode(sconcat);
-  mems.insert(mems.begin(), eq);
-  Node bvl = nm->mkNode(BOUND_VAR_LIST, vars);
-  Node ebody = nm->mkNode(AND, mems);
-  Node eform = nm->mkNode(EXISTS, bvl, ebody);
-  mem.setAttribute(efa, eform);
-  Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl;
-  return eform;
-}
-
 }  // namespace strings
 }  // namespace theory
 }  // namespace cvc5
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
index a20e9a0a9..04b5a999d 100644
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -190,18 +190,6 @@ class RegExpOpr {
   bool regExpIncludes(Node r1, Node r2);
 
  private:
-  /**
-   * Given a regular expression membership of the form:
-   *   (str.in_re x (re.++ R1 ... Rn))
-   * This returns the valid existentially quantified formula:
-   *   (exists ((x1 String) ... (xn String))
-   *      (=> (str.in_re x (re.++ R1 ... Rn))
-   *      (and (= x (str.++ x1 ... xn))
-   *           (str.in_re x1 R1) ... (str.in_re xn Rn))))
-   * Moreover, this formula is cached per regular expression membership via
-   * an attribute, meaning it is always the same for a given membership mem.
-   */
-  static Node getExistsForRegExpConcatMem(Node mem);
   /** pointer to the skolem cache used by this class */
   SkolemCache* d_sc;
 };
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2021-05-24 09:39:56 -0500
committer	GitHub <noreply@github.com>	2021-05-24 09:39:56 -0500
commit	b00667a21984dc3f6b960a1a2d22845c7096bc35 (patch)
tree	8a6dc1bbe37bcc1617f672403800999dd1cc203a /src
parent	0e9fed387aaf5d0cb1844ab20551fbebfcb8893d (diff)