(proof-new) Refactor theory preprocessing (#5835)

This simplifies our algorithm for theory preprocessing. The motivation is two-fold: (1) Proofs are currently incorrect in rare cases due to incorrectly tracking pre vs post rewrites. (2) We can avoid traversing Boolean connectives with term formula removal. Note that this PR relies on the observation that term formula removal leaves Boolean structure unchanged and can apply locally to theory atoms. Furthermore, notice that our handling of lemmas generated by term formula removal is now more uniform. In the second algorithm, term formula removal is not applied to fixed point anymore. In other words, we do not remove skolems from lemmas generated by term formula removal recursively. Instead, since lemmas are theory-preprocessed recursively, we use the fixed point that runs outside the above algorithm instead of relying on the inner fixed point in term formula removal. This PR resolves several open issues with proof production on proof-new. I will performance test this change on SMT-LIB.
author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2021-02-02 14:48:59 -0600
committer: GitHub <noreply@github.com> 2021-02-02 14:48:59 -0600
commit: 3393c0c828b44f88c92e52a2b49d8a572b2a9b93 (patch)
tree: 4067ab99a209911548ccaa5b0413a1e016c27845 /src/theory/theory_preprocessor.cpp
parent: 478638868ec11c18882b9036850cafe4ff36f4bb (diff)
1 files changed, 139 insertions, 107 deletions
diff --git a/src/theory/theory_preprocessor.cpp b/src/theory/theory_preprocessor.cpp
index 3b5e9cee4..0719d4130 100644
--- a/src/theory/theory_preprocessor.cpp
+++ b/src/theory/theory_preprocessor.cpp
@@ -31,6 +31,7 @@ TheoryPreprocessor::TheoryPreprocessor(TheoryEngine& engine,
     : d_engine(engine),
       d_logicInfo(engine.getLogicInfo()),
       d_ppCache(userContext),
+      d_rtfCache(userContext),
       d_tfr(userContext, pnm),
       d_tpg(pnm ? new TConvProofGenerator(
                       pnm,
@@ -40,6 +41,19 @@ TheoryPreprocessor::TheoryPreprocessor(TheoryEngine& engine,
                       "TheoryPreprocessor::preprocess_rewrite",
                       &d_iqtc)
                 : nullptr),
+      d_tpgRtf(pnm ? new TConvProofGenerator(pnm,
+                                             userContext,
+                                             TConvPolicy::FIXPOINT,
+                                             TConvCachePolicy::NEVER,
+                                             "TheoryPreprocessor::rtf",
+                                             &d_iqtc)
+                   : nullptr),
+      d_tpgRew(pnm ? new TConvProofGenerator(pnm,
+                                             userContext,
+                                             TConvPolicy::ONCE,
+                                             TConvCachePolicy::NEVER,
+                                             "TheoryPreprocessor::pprew")
+                   : nullptr),
       d_tspg(nullptr),
       d_lp(pnm ? new LazyCDProof(pnm,
                                  nullptr,
@@ -51,14 +65,12 @@ TheoryPreprocessor::TheoryPreprocessor(TheoryEngine& engine,
   {
     // Make the main term conversion sequence generator, which tracks up to
     // three conversions made in succession:
-    // (1) theory preprocessing+rewriting
-    // (2) term formula removal
-    // (3) rewriting
-    // Steps (1) and (3) use a common term conversion generator.
+    // (1) rewriting
+    // (2) (theory preprocessing+rewriting until fixed point)+term formula
+    // removal+rewriting.
     std::vector<ProofGenerator*> ts;
-    ts.push_back(d_tpg.get());
-    ts.push_back(d_tfr.getTConvProofGenerator());
-    ts.push_back(d_tpg.get());
+    ts.push_back(d_tpgRew.get());
+    ts.push_back(d_tpgRtf.get());
     d_tspg.reset(new TConvSeqProofGenerator(
         pnm, ts, userContext, "TheoryPreprocessor::sequence"));
   }
@@ -83,46 +95,33 @@ TrustNode TheoryPreprocessor::preprocessInternal(
 
   Trace("tpp-debug") << "TheoryPreprocessor::preprocess: start " << node
                      << std::endl;
-  // run theory preprocessing
-  TrustNode tpp = theoryPreprocess(node);
-  Node ppNode = tpp.getNode();
 
-  // Remove the ITEs, fixed point
-  TrustNode ttfr = d_tfr.run(ppNode, newLemmas, newSkolems, true);
-  Node rtfNode = ttfr.isNull() ? ppNode : ttfr.getNode();
+  // We must rewrite before preprocessing, because some terms when rewritten
+  // may introduce new terms that are not top-level and require preprocessing.
+  // An example of this is (forall ((x Int)) (and (tail L) (P x))) which
+  // rewrites to (and (tail L) (forall ((x Int)) (P x))). The subterm (tail L)
+  // must be preprocessed as a child here.
+  Node irNode = rewriteWithProof(node, d_tpgRew.get(), true);
 
-  if (Debug.isOn("lemma-ites"))
-  {
-    Debug("lemma-ites") << "removed ITEs from lemma: " << rtfNode << endl;
-    Debug("lemma-ites") << " + now have the following " << newLemmas.size()
-                        << " lemma(s):" << endl;
-    for (size_t i = 0, lsize = newLemmas.size(); i <= lsize; ++i)
-    {
-      Debug("lemma-ites") << " + " << newLemmas[i].getNode() << endl;
-    }
-    Debug("lemma-ites") << endl;
-  }
-  // Rewrite the main node, we rewrite and store the proof step, if necessary,
-  // in d_tpg, which maintains the fact that d_tpg can prove the rewrite.
-  Node retNode = rewriteWithProof(rtfNode);
+  // run theory preprocessing
+  TrustNode tpp = theoryPreprocess(irNode, newLemmas, newSkolems);
+  Node ppNode = tpp.getNode();
 
   if (Trace.isOn("tpp-debug"))
   {
-    if (node != ppNode)
-    {
-      Trace("tpp-debug") << "after preprocessing : " << ppNode << std::endl;
-    }
-    if (rtfNode != ppNode)
+    if (node != irNode)
     {
-      Trace("tpp-debug") << "after rtf : " << rtfNode << std::endl;
+      Trace("tpp-debug") << "after initial rewriting : " << irNode << std::endl;
     }
-    if (retNode != rtfNode)
+    if (irNode != ppNode)
     {
-      Trace("tpp-debug") << "after rewriting : " << retNode << std::endl;
+      Trace("tpp-debug")
+          << "after preprocessing + rewriting and term formula removal : "
+          << ppNode << std::endl;
     }
     Trace("tpp-debug") << "TheoryPreprocessor::preprocess: finish" << std::endl;
   }
-  if (node == retNode)
+  if (node == ppNode)
   {
     Trace("tpp-debug") << "...TheoryPreprocessor::preprocess returned no change"
                        << std::endl;
@@ -136,19 +135,17 @@ TrustNode TheoryPreprocessor::preprocessInternal(
   {
     std::vector<Node> cterms;
     cterms.push_back(node);
+    cterms.push_back(irNode);
     cterms.push_back(ppNode);
-    cterms.push_back(rtfNode);
-    cterms.push_back(retNode);
     // We have that:
-    // node -> ppNode via preprocessing + rewriting
-    // ppNode -> rtfNode via term formula removal
-    // rtfNode -> retNode via rewriting
+    // node -> irNode via rewriting
+    // irNode -> ppNode via theory-preprocessing + rewriting + tf removal
     tret = d_tspg->mkTrustRewriteSequence(cterms);
     tret.debugCheckClosed("tpp-debug", "TheoryPreprocessor::lemma_ret");
   }
   else
   {
-    tret = TrustNode::mkTrustRewrite(node, retNode, nullptr);
+    tret = TrustNode::mkTrustRewrite(node, ppNode, nullptr);
   }
 
   // now, rewrite the lemmas
@@ -270,7 +267,10 @@ struct preprocess_stack_element
   preprocess_stack_element(TNode n) : node(n), children_added(false) {}
 };
 
-TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
+TrustNode TheoryPreprocessor::theoryPreprocess(
+    TNode assertion,
+    std::vector<TrustNode>& newLemmas,
+    std::vector<Node>& newSkolems)
 {
   Trace("theory::preprocess")
       << "TheoryPreprocessor::theoryPreprocess(" << assertion << ")" << endl;
@@ -287,36 +287,54 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
     TNode current = stackHead.node;
 
     Trace("theory::preprocess-debug")
-        << "TheoryPreprocessor::theoryPreprocess(" << assertion
-        << "): processing " << current << endl;
+        << "TheoryPreprocessor::theoryPreprocess processing " << current
+        << endl;
 
     // If node already in the cache we're done, pop from the stack
-    NodeMap::iterator find = d_ppCache.find(current);
-    if (find != d_ppCache.end())
+    if (d_rtfCache.find(current) != d_rtfCache.end())
     {
       toVisit.pop_back();
       continue;
     }
 
-    if (!d_logicInfo.isTheoryEnabled(Theory::theoryOf(current))
-        && Theory::theoryOf(current) != THEORY_SAT_SOLVER)
+    TheoryId tid = Theory::theoryOf(current);
+
+    if (!d_logicInfo.isTheoryEnabled(tid) && tid != THEORY_SAT_SOLVER)
     {
       stringstream ss;
       ss << "The logic was specified as " << d_logicInfo.getLogicString()
-         << ", which doesn't include " << Theory::theoryOf(current)
+         << ", which doesn't include " << tid
          << ", but got a preprocessing-time fact for that theory." << endl
          << "The fact:" << endl
          << current;
       throw LogicException(ss.str());
     }
-
     // If this is an atom, we preprocess its terms with the theory ppRewriter
-    if (Theory::theoryOf(current) != THEORY_BOOL)
+    if (tid != THEORY_BOOL)
     {
       Node ppRewritten = ppTheoryRewrite(current);
-      d_ppCache[current] = ppRewritten;
-      Assert(Rewriter::rewrite(d_ppCache[current].get())
-             == d_ppCache[current].get());
+      Assert(Rewriter::rewrite(ppRewritten) == ppRewritten);
+      if (isProofEnabled() && ppRewritten != current)
+      {
+        TrustNode trn =
+            TrustNode::mkTrustRewrite(current, ppRewritten, d_tpg.get());
+        registerTrustedRewrite(trn, d_tpgRtf.get(), true);
+      }
+
+      // Term formula removal without fixed point. We do not need to do fixed
+      // point since newLemmas are theory-preprocessed until fixed point in
+      // preprocessInternal (at top-level, when procLemmas=true).
+      TrustNode ttfr = d_tfr.run(ppRewritten, newLemmas, newSkolems, false);
+      Node rtfNode = ppRewritten;
+      if (!ttfr.isNull())
+      {
+        rtfNode = ttfr.getNode();
+        registerTrustedRewrite(ttfr, d_tpgRtf.get(), true);
+      }
+      // Finish the conversion by rewriting. This is registered as a
+      // post-rewrite, since it is the last step applied for theory atoms.
+      Node retNode = rewriteWithProof(rtfNode, d_tpgRtf.get(), false);
+      d_rtfCache[current] = retNode;
       continue;
     }
 
@@ -331,17 +349,18 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
       }
       for (unsigned i = 0; i < current.getNumChildren(); ++i)
       {
-        Assert(d_ppCache.find(current[i]) != d_ppCache.end());
-        builder << d_ppCache[current[i]].get();
+        Assert(d_rtfCache.find(current[i]) != d_rtfCache.end());
+        builder << d_rtfCache[current[i]].get();
       }
       // Mark the substitution and continue
       Node result = builder;
-      // always rewrite here, since current may not be in rewritten form
-      result = rewriteWithProof(result);
+      // always rewrite here, since current may not be in rewritten form after
+      // reconstruction
+      result = rewriteWithProof(result, d_tpgRtf.get(), false);
       Trace("theory::preprocess-debug")
-          << "TheoryPreprocessor::theoryPreprocess(" << assertion
-          << "): setting " << current << " -> " << result << endl;
-      d_ppCache[current] = result;
+          << "TheoryPreprocessor::theoryPreprocess setting " << current
+          << " -> " << result << endl;
+      d_rtfCache[current] = result;
       toVisit.pop_back();
     }
     else
@@ -356,8 +375,7 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
              ++child_it)
         {
           TNode childNode = *child_it;
-          NodeMap::iterator childFind = d_ppCache.find(childNode);
-          if (childFind == d_ppCache.end())
+          if (d_rtfCache.find(childNode) == d_rtfCache.end())
           {
             toVisit.push_back(childNode);
           }
@@ -367,15 +385,16 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
       {
         // No children, so we're done
         Trace("theory::preprocess-debug")
-            << "SubstitutionMap::internalSubstitute(" << assertion
-            << "): setting " << current << " -> " << current << endl;
-        d_ppCache[current] = current;
+            << "SubstitutionMap::internalSubstitute setting " << current
+            << " -> " << current << endl;
+        d_rtfCache[current] = current;
         toVisit.pop_back();
       }
     }
   }
+  Assert(d_rtfCache.find(assertion) != d_rtfCache.end());
   // Return the substituted version
-  Node res = d_ppCache[assertion];
+  Node res = d_rtfCache[assertion];
   return TrustNode::mkTrustRewrite(assertion, res, d_tpg.get());
 }
 
@@ -391,27 +410,24 @@ Node TheoryPreprocessor::ppTheoryRewrite(TNode term)
   {
     return preprocessWithProof(term);
   }
+  // should be in rewritten form here
+  Assert(term == Rewriter::rewrite(term));
   Trace("theory-pp") << "ppTheoryRewrite { " << term << endl;
-  // We must rewrite before preprocessing, because some terms when rewritten
-  // may introduce new terms that are not top-level and require preprocessing.
-  // An example of this is (forall ((x Int)) (and (tail L) (P x))) which
-  // rewrites to (and (tail L) (forall ((x Int)) (P x))). The subterm (tail L)
-  // must be preprocessed as a child here.
-  Node newTerm = rewriteWithProof(term);
   // do not rewrite inside quantifiers
-  if (newTerm.getNumChildren() > 0 && !newTerm.isClosure())
+  Node newTerm = term;
+  if (!term.isClosure())
   {
-    NodeBuilder<> newNode(newTerm.getKind());
-    if (newTerm.getMetaKind() == kind::metakind::PARAMETERIZED)
+    NodeBuilder<> newNode(term.getKind());
+    if (term.getMetaKind() == kind::metakind::PARAMETERIZED)
     {
-      newNode << newTerm.getOperator();
+      newNode << term.getOperator();
     }
-    for (const Node& nt : newTerm)
+    for (const Node& nt : term)
     {
       newNode << ppTheoryRewrite(nt);
     }
     newTerm = Node(newNode);
-    newTerm = rewriteWithProof(newTerm);
+    newTerm = rewriteWithProof(newTerm, d_tpg.get(), false);
   }
   newTerm = preprocessWithProof(newTerm);
   d_ppCache[term] = newTerm;
@@ -419,9 +435,10 @@ Node TheoryPreprocessor::ppTheoryRewrite(TNode term)
   return newTerm;
 }
 
-Node TheoryPreprocessor::rewriteWithProof(Node term)
+Node TheoryPreprocessor::rewriteWithProof(Node term,
+                                          TConvProofGenerator* pg,
+                                          bool isPre)
 {
-  // FIXME (project #37): should properly distinguish pre vs post rewrite
   Node termr = Rewriter::rewrite(term);
   // store rewrite step if tracking proofs and it rewrites
   if (isProofEnabled())
@@ -432,7 +449,7 @@ Node TheoryPreprocessor::rewriteWithProof(Node term)
       Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (rewriting) "
                          << term << " -> " << termr << std::endl;
       // always use term context hash 0 (default)
-      d_tpg->addRewriteStep(term, termr, PfRule::REWRITE, {}, {term}, false);
+      pg->addRewriteStep(term, termr, PfRule::REWRITE, {}, {term}, isPre);
     }
   }
   return termr;
@@ -471,37 +488,52 @@ Node TheoryPreprocessor::preprocessWithProof(Node term)
   }
   Node termr = trn.getNode();
   Assert(term != termr);
-  // FIXME (project #37): should properly distinguish pre vs post rewrite
   if (isProofEnabled())
   {
-    if (trn.getGenerator() != nullptr)
-    {
-      Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (generator) "
-                         << term << " -> " << termr << std::endl;
-      trn.debugCheckClosed("tpp-debug",
-                           "TheoryPreprocessor::preprocessWithProof");
-      // always use term context hash 0 (default)
-      d_tpg->addRewriteStep(
-          term, termr, trn.getGenerator(), false, PfRule::ASSUME, true);
-    }
-    else
-    {
-      Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (trusted) "
-                         << term << " -> " << termr << std::endl;
-      // small step trust
-      d_tpg->addRewriteStep(term,
-                            termr,
-                            PfRule::THEORY_PREPROCESS,
-                            {},
-                            {term.eqNode(termr)},
-                            false);
-    }
+    registerTrustedRewrite(trn, d_tpg.get(), false);
   }
-  termr = rewriteWithProof(termr);
+  // Rewrite again here, which notice is a *pre* rewrite.
+  termr = rewriteWithProof(termr, d_tpg.get(), true);
   return ppTheoryRewrite(termr);
 }
 
 bool TheoryPreprocessor::isProofEnabled() const { return d_tpg != nullptr; }
 
+void TheoryPreprocessor::registerTrustedRewrite(TrustNode trn,
+                                                TConvProofGenerator* pg,
+                                                bool isPre)
+{
+  if (!isProofEnabled() || trn.isNull())
+  {
+    return;
+  }
+  Assert(trn.getKind() == TrustNodeKind::REWRITE);
+  Node eq = trn.getProven();
+  Node term = eq[0];
+  Node termr = eq[1];
+  if (trn.getGenerator() != nullptr)
+  {
+    Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (generator) "
+                       << term << " -> " << termr << std::endl;
+    trn.debugCheckClosed("tpp-debug",
+                         "TheoryPreprocessor::preprocessWithProof");
+    // always use term context hash 0 (default)
+    pg->addRewriteStep(
+        term, termr, trn.getGenerator(), isPre, PfRule::ASSUME, true);
+  }
+  else
+  {
+    Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (trusted) "
+                       << term << " -> " << termr << std::endl;
+    // small step trust
+    pg->addRewriteStep(term,
+                       termr,
+                       PfRule::THEORY_PREPROCESS,
+                       {},
+                       {term.eqNode(termr)},
+                       isPre);
+  }
+}
+
 }  // namespace theory
 }  // namespace CVC4
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2021-02-02 14:48:59 -0600
committer	GitHub <noreply@github.com>	2021-02-02 14:48:59 -0600
commit	3393c0c828b44f88c92e52a2b49d8a572b2a9b93 (patch)
tree	4067ab99a209911548ccaa5b0413a1e016c27845 /src/theory/theory_preprocessor.cpp
parent	478638868ec11c18882b9036850cafe4ff36f4bb (diff)