From f93a68fdf2b62a40dd74bdb04aafb60ea7f1a69a Mon Sep 17 00:00:00 2001 From: Alex Ozdemir Date: Thu, 28 Feb 2019 21:54:08 -0800 Subject: ErProof class with LFSC output (#2812) * ErProof class with LFSC output * Created a TraceCheckProof class * parsable from text * Created an ErProof class * constructible from a TraceCheckProof * writable as LFSC * A bunch of unit tests * Reponded to Mathias's first set of comments. Credits to Mathias for many of the fixes! * Responed to Andres's first set, fixed tests I accidentally deleted a "!" last time, causing stuff to fail. * Use Configuration::isAssertionBuild * Clarified comment * Responded to Andres's 2nd review * Gaurding against a memory error. * Renaming a file. * Aggressively unlinking temporary files. --- src/proof/er/er_proof.cpp | 383 ++++++++++++++++++++++++++++++++++++++++++++++ src/proof/er/er_proof.h | 208 +++++++++++++++++++++++++ 2 files changed, 591 insertions(+) create mode 100644 src/proof/er/er_proof.cpp create mode 100644 src/proof/er/er_proof.h (limited to 'src/proof') diff --git a/src/proof/er/er_proof.cpp b/src/proof/er/er_proof.cpp new file mode 100644 index 000000000..452b64b11 --- /dev/null +++ b/src/proof/er/er_proof.cpp @@ -0,0 +1,383 @@ +/********************* */ +/*! \file er_proof.cpp + ** \verbatim + ** Top contributors (to current version): + ** Alex Ozdemir + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief ER Proof Format + ** + ** Declares C++ types that represent an ER/TRACECHECK proof. + ** Defines serialization for these types. + ** + ** You can find details about the way ER is encoded in the TRACECHECK + ** format at these locations: + ** https://github.com/benjaminkiesl/drat2er + ** http://www.cs.utexas.edu/users/marijn/publications/ijcar18.pdf + **/ + +#include "proof/er/er_proof.h" + +#include +#include +#include +#include +#include +#include + +#include "base/cvc4_assert.h" +#include "base/map_util.h" +#include "proof/dimacs_printer.h" +#include "proof/lfsc_proof_printer.h" +#include "proof/proof_manager.h" + +#if CVC4_USE_DRAT2ER +#include "drat2er.h" +#include "drat2er_options.h" +#endif + +namespace CVC4 { +namespace proof { +namespace er { + +TraceCheckProof TraceCheckProof::fromText(std::istream& in) +{ + TraceCheckProof pf; + TraceCheckIdx idx = 0; + int64_t token = 0; + + // For each line of the proof, start with the idx + // If there is no idx, then you're done! + in >> idx; + for (; !in.eof(); in >> idx) + { + Assert(in.good()); + + // Then parse the clause (it's 0-terminated) + std::vector clause; + in >> token; + for (; token != 0; in >> token) + { + clause.emplace_back(std::abs(token) - 1, token < 0); + } + + // Then parse the chain of literals (it's also 0-terminated) + std::vector chain; + in >> token; + for (; token != 0; in >> token) + { + Assert(token > 0); + chain.push_back(token); + } + + // Add the line to the proof + pf.d_lines.emplace_back(idx, std::move(clause), std::move(chain)); + } + return pf; +} + +ErProof ErProof::fromBinaryDratProof(const ClauseUseRecord& usedClauses, + const std::string& dratBinary) +{ + std::ostringstream cmd; + char formulaFilename[] = "/tmp/cvc4-dimacs-XXXXXX"; + char dratFilename[] = "/tmp/cvc4-drat-XXXXXX"; + char tracecheckFilename[] = "/tmp/cvc4-tracecheck-er-XXXXXX"; + + int r; + r = mkstemp(formulaFilename); + AlwaysAssert(r > 0); + close(r); + r = mkstemp(dratFilename); + AlwaysAssert(r > 0); + close(r); + r = mkstemp(tracecheckFilename); + AlwaysAssert(r > 0); + close(r); + + // Write the formula + std::ofstream formStream(formulaFilename); + printDimacs(formStream, usedClauses); + unlink(formulaFilename); + + // Write the (binary) DRAT proof + std::ofstream dratStream(dratFilename); + dratStream << dratBinary; + unlink(dratFilename); + + // Invoke drat2er +#if CVC4_USE_DRAT2ER + drat2er::TransformDRATToExtendedResolution(formulaFilename, + dratFilename, + tracecheckFilename, + false, + drat2er::options::QUIET); + +#else + Unimplemented( + "ER proof production requires drat2er.\n" + "Run contrib/get-drat2er, reconfigure with --drat2er, and rebuild"); +#endif + + // Parse the resulting TRACECHECK proof into an ER proof. + std::ifstream tracecheckStream(tracecheckFilename); + ErProof proof(usedClauses, TraceCheckProof::fromText(tracecheckStream)); + unlink(tracecheckFilename); + + formStream.close(); + dratStream.close(); + tracecheckStream.close(); + + + + return proof; +} + +ErProof::ErProof(const ClauseUseRecord& usedClauses, + TraceCheckProof&& tracecheck) + : d_inputClauseIds(), d_definitions(), d_tracecheck(tracecheck) +{ + // Step zero, save input clause Ids for future printing + std::transform(usedClauses.begin(), + usedClauses.end(), + std::back_inserter(d_inputClauseIds), + [](const std::pair& pair) { + return pair.first; + }); + + // Step one, verify the formula starts the proof + if (Configuration::isAssertionBuild()) + { + for (size_t i = 0, n = usedClauses.size(); i < n; ++i) + { + Assert(d_tracecheck.d_lines[i].d_idx = i + 1); + Assert(d_tracecheck.d_lines[i].d_chain.size() == 0); + Assert(d_tracecheck.d_lines[i].d_clause.size() + == usedClauses[i].second.size()); + for (size_t j = 0, m = usedClauses[i].second.size(); j < m; ++j) + { + Assert(usedClauses[i].second[j] == d_tracecheck.d_lines[i].d_clause[j]); + } + } + } + + // Step two, identify definitions. They correspond to lines that follow the + // input lines, are in bounds, and have no justifying chain. + for (size_t i = usedClauses.size(), n = d_tracecheck.d_lines.size(); + i < n && d_tracecheck.d_lines[i].d_chain.size() == 0;) + { + prop::SatClause c = d_tracecheck.d_lines[i].d_clause; + Assert(c.size() > 0); + Assert(!c[0].isNegated()); + + // Get the new variable of the definition -- the first variable of the + // first clause + prop::SatVariable newVar = c[0].getSatVariable(); + + // The rest of the literals in the clause of the 'other literals' of the def + std::vector otherLiterals{++c.begin(), c.end()}; + + size_t nLinesForThisDef = 2 + otherLiterals.size(); + // Look at the negation of the second literal in the second clause to get + // the old literal + AlwaysAssert(d_tracecheck.d_lines.size() > i + 1, + "Malformed definition in TRACECHECK proof from drat2er"); + d_definitions.emplace_back(newVar, + ~d_tracecheck.d_lines[i + 1].d_clause[1], + std::move(otherLiterals)); + + // Advance over the lines for this definition + i += nLinesForThisDef; + } +} + +void ErProof::outputAsLfsc(std::ostream& os) const +{ + // How many parens to close? + size_t parenCount = 0; + std::unordered_set newVariables; + + // Print Definitions + for (const ErDefinition& def : d_definitions) + { + os << "\n (decl_rat_elimination_def (" + << (def.d_oldLiteral.isNegated() ? "neg " : "pos ") + << ProofManager::getVarName(def.d_oldLiteral.getSatVariable(), "bb") + << ") "; + LFSCProofPrinter::printSatClause(def.d_otherLiterals, os, "bb"); + os << " (\\ er.v" << def.d_newVariable << " (\\ er.def" + << def.d_newVariable; + newVariables.insert(def.d_newVariable); + } + parenCount += 3 * d_definitions.size(); + + // Clausify Definitions + TraceCheckIdx firstDefClause = d_inputClauseIds.size() + 1; + for (const ErDefinition& def : d_definitions) + { + os << "\n (clausify_rat_elimination_def _ _ _ " + << "er.def " << def.d_newVariable << " _ _ (\\ er.c" << firstDefClause + << " (\\ er.c" << (firstDefClause + 1) << " (\\ er.cnf" + << def.d_newVariable; + + firstDefClause += 2 + def.d_otherLiterals.size(); + } + parenCount += 4 * d_definitions.size(); + + // Unroll proofs of CNFs to proofs of clauses + firstDefClause = d_inputClauseIds.size() + 1; + for (const ErDefinition& def : d_definitions) + { + for (size_t i = 0, n = def.d_otherLiterals.size(); i < n; ++i) + { + os << "\n (cnfc_unroll _ _ "; + os << "er.cnf" << def.d_newVariable; + if (i != 0) + { + os << ".u" << i; + } + os << " (\\ er.c" << (firstDefClause + 2 + i); + os << " (\\ er.cnf" << def.d_newVariable << ".u" << (i + 1); + } + parenCount += 3 * def.d_otherLiterals.size(); + + firstDefClause += 2 + def.d_otherLiterals.size(); + } + + // NB: At this point `firstDefClause` points to the first clause resulting + // from a resolution chain + + // Now, elaborate each resolution chain + for (size_t cId = firstDefClause, nLines = d_tracecheck.d_lines.size(); + cId <= nLines; + ++cId) + { + const std::vector& chain = + d_tracecheck.d_lines[cId - 1].d_chain; + const std::vector pivots = computePivotsForChain(chain); + Assert(chain.size() > 0); + Assert(chain.size() == pivots.size() + 1); + + os << "\n (satlem_simplify _ _ _ "; + parenCount += 1; + + // Print resolution openings (reverse order) + for (int64_t i = pivots.size() - 1; i >= 0; --i) + { + prop::SatLiteral pivot = pivots[i]; + os << "(" << (pivot.isNegated() ? 'Q' : 'R') << " _ _ "; + } + + // Print resolution start + writeIdForClauseProof(os, chain[0]); + os << " "; + + // Print resolution closings (forward order) + for (size_t i = 0, n = pivots.size(); i < n; ++i) + { + prop::SatVariable pivotVar = pivots[i].getSatVariable(); + TraceCheckIdx clauseId = chain[i + 1]; + writeIdForClauseProof(os, clauseId); + os << " "; + if (ContainsKey(newVariables, pivotVar)) + { + // This is a defined variable + os << "er.v" << pivotVar; + } + else + { + os << ProofManager::getVarName(pivotVar, "bb"); + } + os << ") "; + } + os << "(\\ er.c" << cId; + parenCount += 1; + } + + // Write proof of bottom + Assert(d_tracecheck.d_lines.back().d_clause.size() == 0, + "The TRACECHECK proof from drat2er did not prove bottom."); + os << "\n er.c" << d_tracecheck.d_lines.back().d_idx + << " ; (holds cln)\n"; + + // Finally, close the parentheses! + std::fill_n(std::ostream_iterator(os), parenCount, ')'); +} + +namespace { +/** + * Resolves two clauses + * + * @param dest one of the inputs, and the output too. **This is an input and + * output** + * @param src the other input + * + * @return the unique literal that was resolved on, with the polarization that + * it originally had in `dest`. + * + * For example, if dest = (1 3 -4 5) and src = (1 -3 5), then 3 is returned and + * after the call dest = (1 -4 5). + */ +prop::SatLiteral resolveModify( + std::unordered_set& dest, + const prop::SatClause& src) +{ + bool foundPivot = false; + prop::SatLiteral pivot(0, false); + + for (prop::SatLiteral lit : src) + { + auto negationLocation = dest.find(~lit); + if (negationLocation != dest.end()) + { + Assert(!foundPivot); + foundPivot = true; + dest.erase(negationLocation); + pivot = ~lit; + } + dest.insert(lit); + } + + Assert(foundPivot); + return pivot; +} +} // namespace + +std::vector ErProof::computePivotsForChain( + const std::vector& chain) const +{ + std::vector pivots; + + const prop::SatClause& first = d_tracecheck.d_lines[chain[0] - 1].d_clause; + std::unordered_set + runningClause{first.begin(), first.end()}; + + for (auto idx = ++chain.cbegin(), end = chain.cend(); idx != end; ++idx) + { + pivots.push_back( + resolveModify(runningClause, d_tracecheck.d_lines[*idx - 1].d_clause)); + } + return pivots; +} + +void ErProof::writeIdForClauseProof(std::ostream& o, TraceCheckIdx i) const +{ + if (i <= d_inputClauseIds.size()) + { + // This clause is an input clause! Ask the ProofManager for its name + o << ProofManager::getInputClauseName(d_inputClauseIds[i - 1], "bb"); + } + else + { + // This clause was introduced by a definition or resolution chain + o << "er.c" << i; + } +} + +} // namespace er +} // namespace proof +} // namespace CVC4 diff --git a/src/proof/er/er_proof.h b/src/proof/er/er_proof.h new file mode 100644 index 000000000..0a0038738 --- /dev/null +++ b/src/proof/er/er_proof.h @@ -0,0 +1,208 @@ +/********************* */ +/*! \file er_proof.h + ** \verbatim + ** Top contributors (to current version): + ** Alex Ozdemir + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief ER Proof Format + ** + ** Declares C++ types that represent an ER/TRACECHECK proof. + ** Defines serialization for these types. + ** + ** You can find details about the way ER is encoded in the TRACECHECK + ** format at these locations: + ** https://github.com/benjaminkiesl/drat2er + ** http://www.cs.utexas.edu/users/marijn/publications/ijcar18.pdf + ** + **/ + +#include "cvc4_private.h" + +#ifndef __CVC4__PROOF__ER__ER_PROOF_H +#define __CVC4__PROOF__ER__ER_PROOF_H + +#include +#include + +#include "proof/clause_id.h" +#include "prop/sat_solver_types.h" + +namespace CVC4 { +namespace proof { +namespace er { + +using ClauseUseRecord = std::vector>; + +/** + * A definition of the form: + * newVar <-> p v (~x_1 ^ ~x_2 ^ ... ^ ~x_n) + */ +struct ErDefinition +{ + ErDefinition(prop::SatVariable newVariable, + prop::SatLiteral oldLiteral, + std::vector&& otherLiterals) + : d_newVariable(newVariable), + d_oldLiteral(oldLiteral), + d_otherLiterals(otherLiterals) + { + } + + // newVar + prop::SatVariable d_newVariable; + // p + prop::SatLiteral d_oldLiteral; + // A list of the x_i's + std::vector d_otherLiterals; +}; + +// For representing a clause's index within a TRACECHECK proof. +using TraceCheckIdx = size_t; + +/** + * A single line in a TRACECHECK proof. + * + * Consists of the index of a new clause, the literals of that clause, and the + * indices for preceding clauses which can be combined in a resolution chain to + * produce this new clause. + */ +struct TraceCheckLine +{ + TraceCheckLine(TraceCheckIdx idx, + std::vector&& clause, + std::vector&& chain) + : d_idx(idx), d_clause(clause), d_chain(chain) + { + } + + // The index of the new clause + TraceCheckIdx d_idx; + // The new clause + std::vector d_clause; + /** + * Indices of clauses which must be resolved to produce this new clause. + * While the TRACECHECK format does not specify the order, we require them to + * be in resolution-order. + */ + std::vector d_chain; +}; + +/** + * A TRACECHECK proof -- just a list of lines + */ +struct TraceCheckProof +{ + static TraceCheckProof fromText(std::istream& in); + TraceCheckProof() : d_lines() {} + + // The lines of this proof. + std::vector d_lines; +}; + +/** + * An extended resolution proof. + * It supports resolution, along with extensions of the form + * + * newVar <-> p v (~x_1 ^ ~x_2 ^ ... ^ ~x_n) + */ +class ErProof +{ + public: + /** + * Construct an ER proof from a DRAT proof, using drat2er + * + * @param usedClauses The CNF formula that we're deriving bottom from. + * @param dratBinary The DRAT proof from the SAT solver, as a binary stream. + */ + static ErProof fromBinaryDratProof(const ClauseUseRecord& usedClauses, + const std::string& dratBinary); + + /** + * Construct an ER proof from a TRACECHECK ER proof + * + * This basically just identifies groups of lines which correspond to + * definitions, and extracts them. + * + * @param usedClauses The CNF formula that we're deriving bottom from. + * @param tracecheck The TRACECHECK proof, as a stream. + */ + ErProof(const ClauseUseRecord& usedClauses, TraceCheckProof&& tracecheck); + + /** + * Write the ER proof as an LFSC value of type (holds cln). + * The format is from the LFSC signature er.plf + * + * Reads the current `ProofManager` to determine what the variables should be + * named. + * + * @param os the stream to write to + */ + void outputAsLfsc(std::ostream& os) const; + + const std::vector& getInputClauseIds() const + { + return d_inputClauseIds; + } + + const std::vector& getDefinitions() const + { + return d_definitions; + } + + const TraceCheckProof& getTraceCheckProof() const { return d_tracecheck; } + + private: + /** + * Creates an empty ErProof. + */ + ErProof() : d_inputClauseIds(), d_definitions(), d_tracecheck() {} + + /** + * Computes the pivots on the basis of which an in-order resolution chain is + * resolved. + * + * c0 c1 + * \ / Clauses c_i being resolved in a chain around + * v1 c2 pivots v_i. + * \ / + * v2 c3 + * \ / + * v3 c4 + * \ / + * v4 + * + * + * @param chain the chain, of N clause indices + * + * @return a list of N - 1 variables, the list ( v_i ) from i = 1 to N - 1 + */ + std::vector computePivotsForChain( + const std::vector& chain) const; + + /** + * Write the LFSC identifier for the proof of a clause + * + * @param o where to write to + * @param i the TRACECHECK index for the clause whose proof identifier to + * print + */ + void writeIdForClauseProof(std::ostream& o, TraceCheckIdx i) const; + + // A list of the Ids for the input clauses, in order. + std::vector d_inputClauseIds; + // A list of new variable definitions, in order. + std::vector d_definitions; + // The underlying TRACECHECK proof. + TraceCheckProof d_tracecheck; +}; + +} // namespace er +} // namespace proof +} // namespace CVC4 + +#endif // __CVC4__PROOF__ER__ER_PROOF_H -- cgit v1.2.3