From c9b7c3d6fcd49b6d75a85e1316e9918374d1ebbe Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Sun, 22 Mar 2020 21:05:37 -0700 Subject: Collect statistics about normal form inferences (#4127) This commit adds code to count the number of inferences made of each inference type for normal form inferences. It extends the Inference enum in `infer_info.h` and adds two new `sendInference()` methods in the `InferenceManager` to send and count inferences that have a corresonding entry in the `Inference` enum. --- src/theory/strings/core_solver.cpp | 29 +++++++++++----------- src/theory/strings/infer_info.cpp | 22 ++++++++++------- src/theory/strings/infer_info.h | 40 ++++++++++++++++++++++-------- src/theory/strings/inference_manager.cpp | 32 ++++++++++++++++++++---- src/theory/strings/inference_manager.h | 31 ++++++++++++++++++++++- src/theory/strings/sequences_stats.cpp | 37 ++++++++++++++++++++++++++++ src/theory/strings/sequences_stats.h | 42 ++++++++++++++++++++++++++++++++ src/theory/strings/theory_strings.cpp | 22 +---------------- src/theory/strings/theory_strings.h | 28 +++++++++------------ 9 files changed, 206 insertions(+), 77 deletions(-) create mode 100644 src/theory/strings/sequences_stats.cpp create mode 100644 src/theory/strings/sequences_stats.h (limited to 'src/theory') diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 447af00e8..556ae28c3 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -974,7 +974,7 @@ void CoreSolver::processNEqc(std::vector& normal_forms) bool set_use_index = false; Trace("strings-solve") << "Possible inferences (" << pinfer.size() << ") : " << std::endl; - unsigned min_id = 9; + Inference min_id = Inference::NONE; unsigned max_index = 0; for (unsigned i = 0, size = pinfer.size(); i < size; i++) { @@ -1031,7 +1031,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // can infer that this string must be empty Node eq = nfkv[index_k].eqNode(d_emptyString); Assert(!d_state.areEqual(d_emptyString, nfkv[index_k])); - d_im.sendInference(curr_exp, eq, "N_EndpointEmp"); + d_im.sendInference(curr_exp, eq, Inference::N_ENDPOINT_EMP); index_k++; } break; @@ -1079,7 +1079,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Node leneq = xLenTerm.eqNode(yLenTerm); NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, lenExp); lenExp.push_back(leneq); - d_im.sendInference(lenExp, eq, "N_Unify"); + d_im.sendInference(lenExp, eq, Inference::N_UNIFY); break; } else if ((x.getKind() != CONST_STRING && index == nfiv.size() - rproc - 1) @@ -1116,7 +1116,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, { std::vector antec; NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, antec); - d_im.sendInference(antec, eqn[0].eqNode(eqn[1]), "N_EndpointEq", true); + d_im.sendInference( + antec, eqn[0].eqNode(eqn[1]), Inference::N_ENDPOINT_EQ, true); } else { @@ -1176,7 +1177,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // E.g. "abc" ++ ... = "bc" ++ ... ---> conflict std::vector antec; NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, antec); - d_im.sendInference(antec, d_false, "N_Const", true); + d_im.sendInference(antec, d_false, Inference::N_CONST, true); break; } } @@ -1205,7 +1206,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, lenEq = Rewriter::rewrite(lenEq); info.d_conc = nm->mkNode(OR, lenEq, lenEq.negate()); info.d_pending_phase[lenEq] = true; - info.d_id = INFER_LEN_SPLIT; + info.d_id = Inference::LEN_SPLIT; pinfer.push_back(info); break; } @@ -1276,12 +1277,12 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // inferred info.d_conc = nm->mkNode( AND, p.eqNode(nc), !eq.getConst() ? pEq.negate() : pEq); - info.d_id = INFER_INFER_EMP; + info.d_id = Inference::INFER_EMP; } else { info.d_conc = nm->mkNode(OR, eq, eq.negate()); - info.d_id = INFER_LEN_SPLIT_EMP; + info.d_id = Inference::LEN_SPLIT_EMP; } pinfer.push_back(info); break; @@ -1353,7 +1354,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, info.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, prea) : utils::mkNConcat(prea, sk)); info.d_new_skolem[LENGTH_SPLIT].push_back(sk); - info.d_id = INFER_SSPLIT_CST_PROP; + info.d_id = Inference::SSPLIT_CST_PROP; pinfer.push_back(info); break; } @@ -1379,7 +1380,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, info.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, firstChar) : utils::mkNConcat(firstChar, sk)); info.d_new_skolem[LENGTH_SPLIT].push_back(sk); - info.d_id = INFER_SSPLIT_CST; + info.d_id = Inference::SSPLIT_CST; pinfer.push_back(info); break; } @@ -1455,14 +1456,14 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, { info.d_antn.push_back(lentTestExp); info.d_conc = lentTestSuccess == 0 ? eq1 : eq2; - info.d_id = INFER_SSPLIT_VAR_PROP; + info.d_id = Inference::SSPLIT_VAR_PROP; } else { Node ldeq = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate(); info.d_ant.push_back(ldeq); info.d_conc = nm->mkNode(OR, eq1, eq2); - info.d_id = INFER_SSPLIT_VAR; + info.d_id = Inference::SSPLIT_VAR; } pinfer.push_back(info); break; @@ -1582,7 +1583,7 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, { // try to make t equal to empty to avoid loop info.d_conc = nm->mkNode(kind::OR, split_eq, split_eq.negate()); - info.d_id = INFER_LEN_SPLIT_EMP; + info.d_id = Inference::LEN_SPLIT_EMP; return ProcessLoopResult::INFERENCE; } else @@ -1703,7 +1704,7 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, // we will be done info.d_conc = conc; - info.d_id = INFER_FLOOP; + info.d_id = Inference::FLOOP; info.d_nf_pair[0] = nfi.d_base; info.d_nf_pair[1] = nfj.d_base; return ProcessLoopResult::INFERENCE; diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp index e15ee984d..aa7fe8974 100644 --- a/src/theory/strings/infer_info.cpp +++ b/src/theory/strings/infer_info.cpp @@ -24,20 +24,24 @@ std::ostream& operator<<(std::ostream& out, Inference i) { switch (i) { - case INFER_INFER_EMP: out << "Infer-Emp"; break; - case INFER_SSPLIT_CST_PROP: out << "S-Split(CST-P)-prop"; break; - case INFER_SSPLIT_VAR_PROP: out << "S-Split(VAR)-prop"; break; - case INFER_LEN_SPLIT: out << "Len-Split(Len)"; break; - case INFER_LEN_SPLIT_EMP: out << "Len-Split(Emp)"; break; - case INFER_SSPLIT_CST: out << "S-Split(CST-P)"; break; - case INFER_SSPLIT_VAR: out << "S-Split(VAR)"; break; - case INFER_FLOOP: out << "F-Loop"; break; + case Inference::N_ENDPOINT_EMP: out << "N_EndpointEmp"; break; + case Inference::N_UNIFY: out << "N_Unify"; break; + case Inference::N_ENDPOINT_EQ: out << "N_EndpointEq"; break; + case Inference::N_CONST: out << "N_Const"; break; + case Inference::INFER_EMP: out << "Infer-Emp"; break; + case Inference::SSPLIT_CST_PROP: out << "S-Split(CST-P)-prop"; break; + case Inference::SSPLIT_VAR_PROP: out << "S-Split(VAR)-prop"; break; + case Inference::LEN_SPLIT: out << "Len-Split(Len)"; break; + case Inference::LEN_SPLIT_EMP: out << "Len-Split(Emp)"; break; + case Inference::SSPLIT_CST: out << "S-Split(CST-P)"; break; + case Inference::SSPLIT_VAR: out << "S-Split(VAR)"; break; + case Inference::FLOOP: out << "F-Loop"; break; default: out << "?"; break; } return out; } -InferInfo::InferInfo() : d_id(INFER_NONE), d_index(0), d_rev(false) {} +InferInfo::InferInfo() : d_id(Inference::NONE), d_index(0), d_rev(false) {} } // namespace strings } // namespace theory diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h index b98b4fbf2..9d2f71b53 100644 --- a/src/theory/strings/infer_info.h +++ b/src/theory/strings/infer_info.h @@ -29,50 +29,70 @@ namespace strings { * * These are variants of the inference rules in Figures 3-5 of Liang et al. * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014. + * + * Note: The order in this enum matters in certain cases (e.g. inferences + * related to normal forms), inferences that come first are generally + * preferred. */ -enum Inference +enum class Inference : uint32_t { - INFER_NONE = 0, + // Given two normal forms, infers that the remainder one of them has to be + // empty. For example: + // If x1 ++ x2 = y1 and x1 = y1, then x2 = "" + N_ENDPOINT_EMP, + // Given two normal forms, infers that two components have to be the same if + // they have the same length. For example: + // If x1 ++ x2 = x3 ++ x4 and len(x1) = len(x3) then x1 = x3 + N_UNIFY, + // Given two normal forms, infers that the endpoints have to be the same. For + // example: + // If x1 ++ x2 = x3 ++ x4 ++ x5 and x1 = x3 then x2 = x4 ++ x5 + N_ENDPOINT_EQ, + // Given two normal forms with constant endpoints, infers a conflict if the + // endpoints do not agree. For example: + // If "abc" ++ ... = "bc" ++ ... then conflict + N_CONST, // infer empty, for example: // (~) x = "" // This is inferred when we encounter an x such that x = "" rewrites to a // constant. This inference is used for instance when we otherwise would have // split on the emptiness of x but the rewriter tells us the emptiness of x // can be inferred. - INFER_INFER_EMP = 1, + INFER_EMP, // string split constant propagation, for example: // x = y, x = "abc", y = y1 ++ "b" ++ y2 // implies y1 = "a" ++ y1' - INFER_SSPLIT_CST_PROP, + SSPLIT_CST_PROP, // string split variable propagation, for example: // x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 ) // implies x1 = y1 ++ x1' // This is inspired by Zheng et al CAV 2015. - INFER_SSPLIT_VAR_PROP, + SSPLIT_VAR_PROP, // length split, for example: // len( x1 ) = len( y1 ) V len( x1 ) != len( y1 ) // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2. - INFER_LEN_SPLIT, + LEN_SPLIT, // length split empty, for example: // z = "" V z != "" // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z - INFER_LEN_SPLIT_EMP, + LEN_SPLIT_EMP, // string split constant // x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != "" // implies y1 = "c" ++ y1' // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014. - INFER_SSPLIT_CST, + SSPLIT_CST, // string split variable, for example: // x = y, x = x1 ++ x2, y = y1 ++ y2 // implies x1 = y1 ++ x1' V y1 = x1 ++ y1' // This is rule F-Split in Figure 5 of Liang et al CAV 2014. - INFER_SSPLIT_VAR, + SSPLIT_VAR, // flat form loop, for example: // x = y, x = x1 ++ z, y = z ++ y2 // implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1 // for fresh u, u1, u2. // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014. - INFER_FLOOP, + FLOOP, + NONE, }; std::ostream& operator<<(std::ostream& out, Inference i); diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp index 389c4e7bf..eebad2274 100644 --- a/src/theory/strings/inference_manager.cpp +++ b/src/theory/strings/inference_manager.cpp @@ -34,11 +34,13 @@ InferenceManager::InferenceManager(TheoryStrings& p, context::UserContext* u, SolverState& s, SkolemCache& skc, - OutputChannel& out) + OutputChannel& out, + SequencesStatistics& statistics) : d_parent(p), d_state(s), d_skCache(skc), d_out(out), + d_statistics(statistics), d_keep(c), d_proxyVar(u), d_proxyVarToLength(u), @@ -186,13 +188,33 @@ void InferenceManager::sendInference(const std::vector& exp, sendInference(exp, exp_n, eq, c, asLemma); } -void InferenceManager::sendInference(const InferInfo& i) +void InferenceManager::sendInference(const std::vector& exp, + const std::vector& exp_n, + Node eq, + Inference infer, + bool asLemma) +{ + d_statistics.d_inferences << infer; + std::stringstream ss; + ss << infer; + sendInference(exp, exp_n, eq, ss.str().c_str(), asLemma); +} + +void InferenceManager::sendInference(const std::vector& exp, + Node eq, + Inference infer, + bool asLemma) { - std::stringstream ssi; - ssi << i.d_id; - sendInference(i.d_ant, i.d_antn, i.d_conc, ssi.str().c_str(), true); + d_statistics.d_inferences << infer; + std::stringstream ss; + ss << infer; + sendInference(exp, eq, ss.str().c_str(), asLemma); } +void InferenceManager::sendInference(const InferInfo& i) +{ + sendInference(i.d_ant, i.d_antn, i.d_conc, i.d_id, true); +} void InferenceManager::sendLemma(Node ant, Node conc, const char* c) { if (conc.isNull() || conc == d_false) diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h index 50cfdb6fb..c9d483c73 100644 --- a/src/theory/strings/inference_manager.h +++ b/src/theory/strings/inference_manager.h @@ -25,6 +25,7 @@ #include "expr/node.h" #include "theory/output_channel.h" #include "theory/strings/infer_info.h" +#include "theory/strings/sequences_stats.h" #include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" #include "theory/uf/equality_engine.h" @@ -76,7 +77,8 @@ class InferenceManager context::UserContext* u, SolverState& s, SkolemCache& skc, - OutputChannel& out); + OutputChannel& out, + SequencesStatistics& statistics); ~InferenceManager() {} /** send internal inferences @@ -141,6 +143,28 @@ class InferenceManager Node eq, const char* c, bool asLemma = false); + + /** + * The same as `sendInference()` above but with an `Inference` instead of a + * string. This variant updates the statistics about the number of inferences + * made of each type. + */ + void sendInference(const std::vector& exp, + const std::vector& exp_n, + Node eq, + Inference infer, + bool asLemma = false); + + /** + * The same as `sendInference()` above but with an `Inference` instead of a + * string. This variant updates the statistics about the number of inferences + * made of each type. + */ + void sendInference(const std::vector& exp, + Node eq, + Inference infer, + bool asLemma = false); + /** Send inference * * Makes the appropriate call to send inference based on the infer info @@ -327,6 +351,10 @@ class InferenceManager * This is a reference to the output channel of the theory of strings. */ OutputChannel& d_out; + + /** Reference to the statistics for the theory of strings/sequences. */ + SequencesStatistics& d_statistics; + /** Common constants */ Node d_emptyString; Node d_true; @@ -366,6 +394,7 @@ class InferenceManager NodeNodeMap d_proxyVarToLength; /** List of terms that we have register length for */ NodeSet d_lengthLemmaTermsCache; + /** infer substitution proxy vars * * This method attempts to (partially) convert the formula n into a diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp new file mode 100644 index 000000000..0f1e93599 --- /dev/null +++ b/src/theory/strings/sequences_stats.cpp @@ -0,0 +1,37 @@ +/********************* */ +/*! \file sequences_stats.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Statistics for the theory of strings/sequences + **/ + + +#include "theory/strings/sequences_stats.h" + +#include "smt/smt_statistics_registry.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +SequencesStatistics::SequencesStatistics() + : d_inferences("theory::strings::inferences") +{ + smtStatisticsRegistry()->registerStat(&d_inferences); +} + +SequencesStatistics::~SequencesStatistics() +{ + smtStatisticsRegistry()->unregisterStat(&d_inferences); +} + +} +} +} diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h new file mode 100644 index 000000000..b55178f4c --- /dev/null +++ b/src/theory/strings/sequences_stats.h @@ -0,0 +1,42 @@ +/********************* */ +/*! \file sequences_stats.h + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Statistics for the theory of strings/sequences + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__SEQUENCES_STATS_H +#define CVC4__THEORY__STRINGS__SEQUENCES_STATS_H + +#include "theory/strings/infer_info.h" +#include "util/statistics_registry.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +class SequencesStatistics +{ + public: + SequencesStatistics(); + ~SequencesStatistics(); + + /** Counts the number of inferences made of each type of inference */ + HistogramStat d_inferences; +}; + + +} +} +} + +#endif /* CVC4__THEORY__STRINGS__SEQUENCES_STATS_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index a26669fbf..1006076d5 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -71,7 +71,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_notify(*this), d_equalityEngine(d_notify, c, "theory::strings", true), d_state(c, d_equalityEngine, d_valuation), - d_im(*this, c, u, d_state, d_sk_cache, out), + d_im(*this, c, u, d_state, d_sk_cache, out, d_statistics), d_pregistered_terms_cache(u), d_registered_terms_cache(u), d_functionsTerms(c), @@ -1155,26 +1155,6 @@ Node TheoryStrings::ppRewrite(TNode atom) { return atom; } -// Stats -TheoryStrings::Statistics::Statistics() - : d_splits("theory::strings::NumOfSplitOnDemands", 0), - d_eq_splits("theory::strings::NumOfEqSplits", 0), - d_deq_splits("theory::strings::NumOfDiseqSplits", 0), - d_loop_lemmas("theory::strings::NumOfLoops", 0) -{ - smtStatisticsRegistry()->registerStat(&d_splits); - smtStatisticsRegistry()->registerStat(&d_eq_splits); - smtStatisticsRegistry()->registerStat(&d_deq_splits); - smtStatisticsRegistry()->registerStat(&d_loop_lemmas); -} - -TheoryStrings::Statistics::~Statistics(){ - smtStatisticsRegistry()->unregisterStat(&d_splits); - smtStatisticsRegistry()->unregisterStat(&d_eq_splits); - smtStatisticsRegistry()->unregisterStat(&d_deq_splits); - smtStatisticsRegistry()->unregisterStat(&d_loop_lemmas); -} - /** run the given inference step */ void TheoryStrings::runInferStep(InferStep s, int effort) { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 84c9e60c6..76c8f0469 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -19,6 +19,9 @@ #ifndef CVC4__THEORY__STRINGS__THEORY_STRINGS_H #define CVC4__THEORY__STRINGS__THEORY_STRINGS_H +#include +#include + #include "context/cdhashset.h" #include "context/cdlist.h" #include "expr/attribute.h" @@ -32,15 +35,13 @@ #include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/regexp_solver.h" +#include "theory/strings/sequences_stats.h" #include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" #include "theory/strings/strings_fmf.h" #include "theory/theory.h" #include "theory/uf/equality_engine.h" -#include -#include - namespace CVC4 { namespace theory { namespace strings { @@ -222,6 +223,13 @@ class TheoryStrings : public Theory { uint32_t d_cardSize; /** The notify class */ NotifyClass d_notify; + + /** + * Statistics for the theory of strings/sequences. All statistics for these + * theories is collected in this object. + */ + SequencesStatistics d_statistics; + /** Equaltity engine */ eq::EqualityEngine d_equalityEngine; /** The solver state object */ @@ -368,19 +376,6 @@ private: // ppRewrite Node ppRewrite(TNode atom) override; - public: - /** statistics class */ - class Statistics { - public: - IntStat d_splits; - IntStat d_eq_splits; - IntStat d_deq_splits; - IntStat d_loop_lemmas; - Statistics(); - ~Statistics(); - };/* class TheoryStrings::Statistics */ - Statistics d_statistics; - private: //-----------------------inference steps /** check register terms pre-normal forms @@ -443,7 +438,6 @@ private: */ void runStrategy(unsigned sbegin, unsigned send); //-----------------------end representation of the strategy - };/* class TheoryStrings */ }/* CVC4::theory::strings namespace */ -- cgit v1.2.3