9 files changed, 44 insertions, 95 deletions
diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp
index 8ee29f712..d214babae 100644
--- a/src/theory/strings/infer_proof_cons.cpp
+++ b/src/theory/strings/infer_proof_cons.cpp
@@ -15,10 +15,10 @@
 
 #include "theory/strings/infer_proof_cons.h"
 
-#include "expr/proof_node_manager.h"
 #include "expr/skolem_manager.h"
 #include "options/smt_options.h"
 #include "options/strings_options.h"
+#include "proof/proof_node_manager.h"
 #include "theory/builtin/proof_checker.h"
 #include "theory/rewriter.h"
 #include "theory/strings/regexp_operation.h"
@@ -712,11 +712,11 @@ void InferProofCons::convert(InferenceId infer,
           }
           else if (eunf.getKind() == AND)
           {
-            // equality is the last conjunct
+            // equality is the first conjunct
             std::vector<Node> childrenAE;
             childrenAE.push_back(eunf);
             std::vector<Node> argsAE;
-            argsAE.push_back(nm->mkConst(Rational(eunf.getNumChildren() - 1)));
+            argsAE.push_back(nm->mkConst(Rational(0)));
             Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE);
             Trace("strings-ipc-prefix")
                 << "--- and elim to " << eunfAE << std::endl;
diff --git a/src/theory/strings/infer_proof_cons.h b/src/theory/strings/infer_proof_cons.h
index af905cbad..bba03dd28 100644
--- a/src/theory/strings/infer_proof_cons.h
+++ b/src/theory/strings/infer_proof_cons.h
@@ -21,12 +21,12 @@
 #include <vector>
 
 #include "expr/node.h"
-#include "expr/proof_checker.h"
-#include "expr/proof_rule.h"
+#include "proof/proof_checker.h"
+#include "proof/proof_rule.h"
+#include "proof/theory_proof_step_buffer.h"
 #include "theory/builtin/proof_checker.h"
 #include "theory/strings/infer_info.h"
 #include "theory/strings/sequences_stats.h"
-#include "theory/theory_proof_step_buffer.h"
 #include "theory/uf/proof_equality_engine.h"
 
 namespace cvc5 {
diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h
index cf9c64bb1..da44100f9 100644
--- a/src/theory/strings/inference_manager.h
+++ b/src/theory/strings/inference_manager.h
@@ -24,7 +24,7 @@
 #include "context/cdhashset.h"
 #include "context/context.h"
 #include "expr/node.h"
-#include "expr/proof_node_manager.h"
+#include "proof/proof_node_manager.h"
 #include "theory/ext_theory.h"
 #include "theory/inference_manager_buffered.h"
 #include "theory/output_channel.h"
diff --git a/src/theory/strings/proof_checker.h b/src/theory/strings/proof_checker.h
index d3ede53ec..56afaed84 100644
--- a/src/theory/strings/proof_checker.h
+++ b/src/theory/strings/proof_checker.h
@@ -19,8 +19,8 @@
 #define CVC5__THEORY__STRINGS__PROOF_CHECKER_H
 
 #include "expr/node.h"
-#include "expr/proof_checker.h"
-#include "expr/proof_node.h"
+#include "proof/proof_checker.h"
+#include "proof/proof_node.h"
 
 namespace cvc5 {
 namespace theory {
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp
index 4b033b7bd..f635cdc39 100644
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -15,8 +15,8 @@
 
 #include "theory/strings/regexp_elim.h"
 
-#include "expr/proof_node_manager.h"
 #include "options/strings_options.h"
+#include "proof/proof_node_manager.h"
 #include "theory/rewriter.h"
 #include "theory/strings/regexp_entail.h"
 #include "theory/strings/theory_strings_utils.h"
@@ -88,10 +88,10 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
   // have a fixed length.
   // The intuition why this is a "non-aggressive" rewrite is that membership
   // into fixed length regular expressions are easy to handle.
-  bool hasFixedLength = true;
   // the index of _* in re
   unsigned pivotIndex = 0;
   bool hasPivotIndex = false;
+  bool hasFixedLength = true;
   std::vector<Node> childLengths;
   std::vector<Node> childLengthsPostPivot;
   for (unsigned i = 0, size = children.size(); i < size; i++)
@@ -105,27 +105,32 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
       {
         hasPivotIndex = true;
         pivotIndex = i;
-        // set to zero for the sum below
+        // zero is used in sum below and is used for concat-fixed-len
         fl = zero;
       }
       else
       {
         hasFixedLength = false;
-        break;
       }
     }
-    childLengths.push_back(fl);
-    if (hasPivotIndex)
+    if (!fl.isNull())
     {
-      childLengthsPostPivot.push_back(fl);
+      childLengths.push_back(fl);
+      if (hasPivotIndex)
+      {
+        childLengthsPostPivot.push_back(fl);
+      }
     }
   }
+  Node lenSum = childLengths.size() > 1
+                    ? nm->mkNode(PLUS, childLengths)
+                    : (childLengths.empty() ? zero : childLengths[0]);
+  // if we have a fixed length
   if (hasFixedLength)
   {
     Assert(re.getNumChildren() == children.size());
-    Node sum = nm->mkNode(PLUS, childLengths);
     std::vector<Node> conc;
-    conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, sum));
+    conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, lenSum));
     Node currEnd = zero;
     for (unsigned i = 0, size = childLengths.size(); i < size; i++)
     {
@@ -326,7 +331,7 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
         Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx);
         conj.push_back(fit);
       }
-      Node res = conj.size() == 1 ? conj[0] : nm->mkNode(AND, conj);
+      Node res = nm->mkAnd(conj);
       // process the non-greedy find variables
       if (!non_greedy_find_vars.empty())
       {
@@ -342,19 +347,23 @@ Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
         Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars);
         res = nm->mkNode(EXISTS, bvl, body);
       }
+      // must also give a minimum length requirement
+      res = nm->mkNode(AND, res, nm->mkNode(GEQ, lenx, lenSum));
       // Examples of this elimination:
       //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
       //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
       //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
       //     indexof(x,"A",0)!=-1 ^
       //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
-      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x)
+      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) ^
+      //     len(x) >= 7
 
       // An example of a non-greedy find:
       //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
-      //     exists k. 0 <= k < len( x ) ^
+      //     (exists k. 0 <= k < len( x ) ^
       //               indexof( x, "A", k ) != -1 ^
-      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B"
+      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B") ^
+      //     len(x) >= 3
       return returnElim(atom, res, "concat-with-gaps");
     }
   }
diff --git a/src/theory/strings/regexp_elim.h b/src/theory/strings/regexp_elim.h
index 5387548de..6187a7137 100644
--- a/src/theory/strings/regexp_elim.h
+++ b/src/theory/strings/regexp_elim.h
@@ -18,8 +18,8 @@
 #define CVC5__THEORY__STRINGS__REGEXP_ELIM_H
 
 #include "expr/node.h"
-#include "theory/eager_proof_generator.h"
-#include "theory/trust_node.h"
+#include "proof/eager_proof_generator.h"
+#include "proof/trust_node.h"
 
 namespace cvc5 {
 namespace theory {
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index bf4c20f85..96351cda9 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -1017,16 +1017,7 @@ Node RegExpOpr::reduceRegExpPos(Node mem,
   {
     std::vector<Node> nvec;
     std::vector<Node> cc;
-    // get the (valid) existential for this membership
-    Node eform = getExistsForRegExpConcatMem(mem);
     SkolemManager* sm = nm->getSkolemManager();
-    // Notice that this rule does not introduce witness terms, instead it
-    // uses skolems in the conclusion of the proof rule directly. Thus,
-    // the existential eform does not need to be explicitly justified by a
-    // proof here, since it is only being used as an intermediate formula in
-    // this inference. Hence we do not pass a proof generator to mkSkolemize.
-    sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem");
-    Assert(newSkolems.size() == r.getNumChildren());
     // Look up skolems for each of the components. If sc has optimizations
     // enabled, this will return arguments of str.to_re.
     for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
@@ -1034,17 +1025,22 @@ Node RegExpOpr::reduceRegExpPos(Node mem,
       if (r[i].getKind() == STRING_TO_REGEXP)
       {
         // optimization, just take the body
-        newSkolems[i] = r[i][0];
+        newSkolems.push_back(r[i][0]);
       }
       else
       {
+        Node ivalue = nm->mkConst(Rational(i));
+        Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT,
+                                       s.getType(),
+                                       {mem[0], mem[1], ivalue});
+        newSkolems.push_back(sk);
         nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
       }
     }
-    // (str.in_re x (re.++ R1 .... Rn)) =>
-    // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn)))
+    // (str.in_re x (re.++ R0 .... Rn)) =>
+    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
     Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
-    nvec.push_back(lem);
+    nvec.insert(nvec.begin(), lem);
     conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
   }
   else if (k == REGEXP_STAR)
@@ -1574,50 +1570,6 @@ bool RegExpOpr::regExpIncludes(Node r1, Node r2)
   return result;
 }
 
-/**
- * Associating formulas with their "exists form", or an existentially
- * quantified formula that is equivalent to it. This is currently used
- * for regular expression memberships in the method below.
- */
-struct ExistsFormAttributeId
-{
-};
-typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute;
-
-Node RegExpOpr::getExistsForRegExpConcatMem(Node mem)
-{
-  // get or make the exists form of the membership
-  ExistsFormAttribute efa;
-  if (mem.hasAttribute(efa))
-  {
-    // already computed
-    return mem.getAttribute(efa);
-  }
-  Assert(mem.getKind() == STRING_IN_REGEXP);
-  Node x = mem[0];
-  Node r = mem[1];
-  Assert(r.getKind() == REGEXP_CONCAT);
-  NodeManager* nm = NodeManager::currentNM();
-  TypeNode xtn = x.getType();
-  std::vector<Node> vars;
-  std::vector<Node> mems;
-  for (const Node& rc : r)
-  {
-    Node v = nm->mkBoundVar(xtn);
-    vars.push_back(v);
-    mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc));
-  }
-  Node sconcat = nm->mkNode(STRING_CONCAT, vars);
-  Node eq = x.eqNode(sconcat);
-  mems.insert(mems.begin(), eq);
-  Node bvl = nm->mkNode(BOUND_VAR_LIST, vars);
-  Node ebody = nm->mkNode(AND, mems);
-  Node eform = nm->mkNode(EXISTS, bvl, ebody);
-  mem.setAttribute(efa, eform);
-  Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl;
-  return eform;
-}
-
 }  // namespace strings
 }  // namespace theory
 }  // namespace cvc5
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
index a20e9a0a9..04b5a999d 100644
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -190,18 +190,6 @@ class RegExpOpr {
   bool regExpIncludes(Node r1, Node r2);
 
  private:
-  /**
-   * Given a regular expression membership of the form:
-   *   (str.in_re x (re.++ R1 ... Rn))
-   * This returns the valid existentially quantified formula:
-   *   (exists ((x1 String) ... (xn String))
-   *      (=> (str.in_re x (re.++ R1 ... Rn))
-   *      (and (= x (str.++ x1 ... xn))
-   *           (str.in_re x1 R1) ... (str.in_re xn Rn))))
-   * Moreover, this formula is cached per regular expression membership via
-   * an attribute, meaning it is always the same for a given membership mem.
-   */
-  static Node getExistsForRegExpConcatMem(Node mem);
   /** pointer to the skolem cache used by this class */
   SkolemCache* d_sc;
 };
diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h
index 7c399759b..7d660e019 100644
--- a/src/theory/strings/term_registry.h
+++ b/src/theory/strings/term_registry.h
@@ -20,8 +20,8 @@
 
 #include "context/cdhashset.h"
 #include "context/cdlist.h"
-#include "expr/proof_node_manager.h"
-#include "theory/eager_proof_generator.h"
+#include "proof/eager_proof_generator.h"
+#include "proof/proof_node_manager.h"
 #include "theory/output_channel.h"
 #include "theory/strings/infer_info.h"
 #include "theory/strings/sequences_stats.h"