More aggressive skolem caching for strings, document and clean preprocessor (#2478)

author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2018-09-17 15:40:28 -0500
committer: GitHub <noreply@github.com> 2018-09-17 15:40:28 -0500
commit: d2b692cb2c054199d75a05f0f700e54fcb4f6c3c (patch)
tree: ce69ae1d1d547d48cf771896410208061949049e /src
parent: c90b5b15ca93e64683c2bbf85def8d7afb4edab8 (diff)
6 files changed, 126 insertions, 111 deletions
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml
index c48583bb2..77056e279 100644
--- a/src/options/strings_options.toml
+++ b/src/options/strings_options.toml
@@ -183,15 +183,6 @@ header = "options/strings_options.h"
   help       = "use model guessing to avoid string extended function reductions"
 
 [[option]]
-  name       = "stringUfReduct"
-  category   = "regular"
-  long       = "strings-uf-reduct"
-  type       = "bool"
-  default    = "false"
-  read_only  = true
-  help       = "use uninterpreted functions when applying extended function reductions"
-
-[[option]]
   name       = "stringBinaryCsp"
   category   = "regular"
   long       = "strings-binary-csp"
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp
index db673dafe..4a78a11ff 100644
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -32,6 +32,11 @@ Node SkolemCache::mkSkolemCached(Node a, Node b, SkolemId id, const char* c)
   return it->second;
 }
 
+Node SkolemCache::mkSkolemCached(Node a, SkolemId id, const char* c)
+{
+  return mkSkolemCached(a, Node::null(), id, c);
+}
+
 Node SkolemCache::mkSkolem(const char* c)
 {
   NodeManager* nm = NodeManager::currentNM();
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h
index 2984ccfe4..c9b9fbe5a 100644
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -42,9 +42,13 @@ class SkolemCache
    *
    * The skolems with _REV suffixes are used for the reverse version of the
    * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
+   *
+   * All skolems assume a and b are strings unless otherwise stated.
    */
   enum SkolemId
   {
+    // exists k. k = a
+    SK_PURIFY,
     // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
     //    exists k. a = "cccc" + k
     SK_ID_C_SPT,
@@ -78,12 +82,30 @@ class SkolemCache
     SK_ID_DEQ_X,
     SK_ID_DEQ_Y,
     SK_ID_DEQ_Z,
+    // contains( a, b ) =>
+    //    exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
+    //                          ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
+    SK_FIRST_CTN_PRE,
+    SK_FIRST_CTN_POST,
+    // For integer b,
+    // len( a ) > b =>
+    //    exists k. a = k ++ a' ^ len( k ) = b
+    SK_PREFIX,
+    // For integer b,
+    // b > 0 =>
+    //    exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
+    SK_SUFFIX_REM,
   };
   /**
    * Returns a skolem of type string that is cached for (a,b,id) and has
    * name c.
    */
   Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c);
+  /**
+   * Returns a skolem of type string that is cached for (a,[null],id) and has
+   * name c.
+   */
+  Node mkSkolemCached(Node a, SkolemId id, const char* c);
   /** Returns a (uncached) skolem of type string with name c */
   Node mkSkolem(const char* c);
   /** Returns true if n is a skolem allocated by this class */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 2e1d6c2c7..f007ae1df 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -112,7 +112,7 @@ TheoryStrings::TheoryStrings(context::Context* c,
       d_pregistered_terms_cache(u),
       d_registered_terms_cache(u),
       d_length_lemma_terms_cache(u),
-      d_preproc(u),
+      d_preproc(&d_sk_cache, u),
       d_preproc_cache(u),
       d_extf_infer_cache(c),
       d_extf_infer_cache_u(u),
@@ -3649,7 +3649,7 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
         return;
       }
     }
-    Node sk = d_sk_cache.mkSkolem("lsym");
+    Node sk = d_sk_cache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
     StringsProxyVarAttribute spva;
     sk.setAttribute(spva, true);
     Node eq = Rewriter::rewrite(sk.eqNode(n));
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index dfab0dd83..2d5bef519 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -30,7 +30,9 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
-StringsPreprocess::StringsPreprocess( context::UserContext* u ){
+StringsPreprocess::StringsPreprocess(SkolemCache *sc, context::UserContext *u)
+    : d_sc(sc)
+{
   //Constants
   d_zero = NodeManager::currentNM()->mkConst(Rational(0));
   d_one = NodeManager::currentNM()->mkConst(Rational(1));
@@ -41,36 +43,6 @@ StringsPreprocess::~StringsPreprocess(){
 
 }
 
-Node StringsPreprocess::getUfForNode( Kind k, Node n, unsigned id ) {
-  std::map< unsigned, Node >::iterator it = d_uf[k].find( id );
-  if( it==d_uf[k].end() ){
-    std::vector< TypeNode > types;
-    for( unsigned i=0; i<n.getNumChildren(); i++ ){
-      types.push_back( n[i].getType() );
-    }
-    TypeNode typ = NodeManager::currentNM()->mkFunctionType( types, n.getType() );
-    Node f = NodeManager::currentNM()->mkSkolem( "sop", typ, "op created for string op" );
-    d_uf[k][id] = f;
-    return f;
-  }else{
-    return it->second;
-  }
-}
-
-//pro: congruence possible, con: introduces UF/requires theory combination
-//  currently hurts performance
-//TODO: for all skolems below
-Node StringsPreprocess::getUfAppForNode( Kind k, Node n, unsigned id ) {
-  std::vector< Node > children;
-  children.push_back( getUfForNode( k, n, id ) );
-  for( unsigned i=0; i<n.getNumChildren(); i++ ){
-    children.push_back( n[i] );
-  }
-  return NodeManager::currentNM()->mkNode( kind::APPLY_UF, children );
-}
-
-//returns an n such that t can be replaced by n, under the assumption of lemmas in new_nodes
-
 Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
   unsigned prev_new_nodes = new_nodes.size();
   Trace("strings-preprocess-debug") << "StringsPreprocess::simplify: " << t << std::endl;
@@ -78,48 +50,54 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
   NodeManager *nm = NodeManager::currentNM();
 
   if( t.getKind() == kind::STRING_SUBSTR ) {
-    Node skt;
-    if( options::stringUfReduct() ){
-      skt = getUfAppForNode( kind::STRING_SUBSTR, t );
-    }else{
-      skt = NodeManager::currentNM()->mkSkolem( "sst", NodeManager::currentNM()->stringType(), "created for substr" );
-    }
-    Node t12 = NodeManager::currentNM()->mkNode( kind::PLUS, t[1], t[2] );
-    Node lt0 = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] );
+    // processing term:  substr( s, n, m )
+    Node s = t[0];
+    Node n = t[1];
+    Node m = t[2];
+    Node skt = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
+    Node t12 = nm->mkNode(PLUS, n, m);
+    t12 = Rewriter::rewrite(t12);
+    Node lt0 = nm->mkNode(STRING_LENGTH, s);
     //start point is greater than or equal zero
-    Node c1 = NodeManager::currentNM()->mkNode( kind::GEQ, t[1], d_zero );
+    Node c1 = nm->mkNode(GEQ, n, d_zero);
     //start point is less than end of string
-    Node c2 = NodeManager::currentNM()->mkNode( kind::GT, lt0, t[1] );
+    Node c2 = nm->mkNode(GT, lt0, n);
     //length is positive
-    Node c3 = NodeManager::currentNM()->mkNode( kind::GT, t[2], d_zero );
-    Node cond = NodeManager::currentNM()->mkNode( kind::AND, c1, c2, c3 );
-  
-    Node sk1 = NodeManager::currentNM()->mkSkolem( "ss1", NodeManager::currentNM()->stringType(), "created for substr" );
-    Node sk2 = NodeManager::currentNM()->mkSkolem( "ss2", NodeManager::currentNM()->stringType(), "created for substr" );
-    Node b11 = t[0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, skt, sk2 ) );
+    Node c3 = nm->mkNode(GT, m, d_zero);
+    Node cond = nm->mkNode(AND, c1, c2, c3);
+
+    Node sk1 = d_sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
+    Node sk2 =
+        d_sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
+    Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2));
     //length of first skolem is second argument
-    Node b12 = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ).eqNode( t[1] );
+    Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
     //length of second skolem is abs difference between end point and end of string
-    Node b13 = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 ).eqNode(
-                 NodeManager::currentNM()->mkNode( kind::ITE, NodeManager::currentNM()->mkNode( kind::GEQ, lt0, t12 ),
-                    NodeManager::currentNM()->mkNode( kind::MINUS, lt0, t12 ), d_zero ) );
+    Node b13 = nm->mkNode(STRING_LENGTH, sk2)
+                   .eqNode(nm->mkNode(ITE,
+                                      nm->mkNode(GEQ, lt0, t12),
+                                      nm->mkNode(MINUS, lt0, t12),
+                                      d_zero));
+
+    Node b1 = nm->mkNode(AND, b11, b12, b13);
+    Node b2 = skt.eqNode(d_empty_str);
+    Node lemma = nm->mkNode(ITE, cond, b1, b2);
 
-    Node b1 = NodeManager::currentNM()->mkNode( kind::AND, b11, b12, b13 );
-    Node b2 = skt.eqNode( NodeManager::currentNM()->mkConst( ::CVC4::String("") ) );
-    Node lemma = NodeManager::currentNM()->mkNode( kind::ITE, cond, b1, b2 );
+    // assert:
+    // IF    n >=0 AND n < len( s ) AND m > 0
+    // THEN: s = sk1 ++ skt ++ sk2 AND
+    //       len( sk1 ) = n AND
+    //       len( sk2 ) = ite( len( s ) >= n+m, len( s )-(n+m), 0 )
+    // ELSE: skt = ""
     new_nodes.push_back( lemma );
+
+    // Thus, substr( s, n, m ) = skt
     retNode = skt;
   }
   else if (t.getKind() == kind::STRING_STRIDOF)
   {
     // processing term:  indexof( x, y, n )
-
-    Node skk;
-    if( options::stringUfReduct() ){
-      skk = getUfAppForNode( kind::STRING_STRIDOF, t );
-    }else{
-      skk = nm->mkSkolem("iok", nm->integerType(), "created for indexof");
-    }
+    Node skk = nm->mkSkolem("iok", nm->integerType(), "created for indexof");
 
     Node negone = nm->mkConst(::CVC4::Rational(-1));
     Node krange = nm->mkNode(kind::GEQ, skk, negone);
@@ -196,12 +174,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     //        NodeManager::currentNM()->mkNode(kind::GEQ, t[0], d_zero),
     //        t[0], NodeManager::currentNM()->mkNode(kind::UMINUS, t[0])));
     Node num = t[0];
-    Node pret;
-    if( options::stringUfReduct() ){
-      pret = NodeManager::currentNM()->mkNode(kind::STRING_ITOS, num);
-    }else{
-      pret = NodeManager::currentNM()->mkSkolem( "itost", NodeManager::currentNM()->stringType(), "created for itos" );
-    }
+    Node pret = nm->mkSkolem("itost", nm->stringType(), "created for itos");
     Node lenp = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, pret);
 
     Node nonneg = NodeManager::currentNM()->mkNode(kind::GEQ, t[0], d_zero);
@@ -298,14 +271,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     retNode = pret;
   } else if( t.getKind() == kind::STRING_STOI ) {
     Node str = t[0];
-    Node pret;
-    if( options::stringUfReduct() ){
-      pret = getUfAppForNode( kind::STRING_STOI, t );
-    }else{
-      pret = NodeManager::currentNM()->mkSkolem( "stoit", NodeManager::currentNM()->integerType(), "created for stoi" );
-    }
-    //Node pret = NodeManager::currentNM()->mkNode(kind::STRING_STOI, str);
-    //Node pret = getUfAppForNode( kind::STRING_STOI, t );
+    Node pret = nm->mkSkolem("stoit", nm->integerType(), "created for stoi");
     Node lenp = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, str);
 
     Node negone = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) );
@@ -410,9 +376,11 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     Node y = t[1];
     Node z = t[2];
     TypeNode tn = t[0].getType();
-    Node rp1 = nm->mkSkolem("rp1", tn, "created for replace");
-    Node rp2 = nm->mkSkolem("rp2", tn, "created for replace");
-    Node rpw = nm->mkSkolem("rpw", tn, "created for replace");
+    Node rp1 =
+        d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre");
+    Node rp2 =
+        d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost");
+    Node rpw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw");
 
     // y = ""
     Node cond1 = y.eqNode(nm->mkConst(CVC4::String("")));
diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h
index 78e92cd51..c670a5483 100644
--- a/src/theory/strings/theory_strings_preprocess.h
+++ b/src/theory/strings/theory_strings_preprocess.h
@@ -20,36 +20,65 @@
 #define __CVC4__THEORY__STRINGS__PREPROCESS_H
 
 #include <vector>
-#include "util/hash.h"
-#include "theory/theory.h"
-#include "theory/rewriter.h"
 #include "context/cdhashmap.h"
+#include "theory/rewriter.h"
+#include "theory/strings/skolem_cache.h"
+#include "theory/theory.h"
+#include "util/hash.h"
 
 namespace CVC4 {
 namespace theory {
 namespace strings {
 
+/** Strings preprocessor
+ *
+ * This class is responsible for extended function reductions. It is used as
+ * a preprocessor when --no-strings-lazy-pp is enabled. By default, it is
+ * used for reducing extended functions on-demand during the "extended function
+ * reductions" inference schema of TheoryStrings.
+ */
 class StringsPreprocess {
-  //Constants
-  Node d_zero;
-  Node d_one;
-  Node d_empty_str;
-  //mapping from kinds to UF
-  std::map< Kind, std::map< unsigned, Node > > d_uf;
-  //get UF for node
-  Node getUfForNode( Kind k, Node n, unsigned id = 0 );
-  Node getUfAppForNode( Kind k, Node n, unsigned id = 0 );
-  //recursive simplify
-  Node simplifyRec( Node t, std::vector< Node > &new_nodes, std::map< Node, Node >& visited );
 public:
-  StringsPreprocess( context::UserContext* u );
-  ~StringsPreprocess();
-  //returns a node that is equivalent to t under assumptions in new_nodes
-  Node simplify( Node t, std::vector< Node > &new_nodes );
-  //process assertion: guarentees to remove all extf
-  Node processAssertion( Node n, std::vector< Node > &new_nodes );
-  //proces assertions: guarentees to remove all extf, rewrite in place
-  void processAssertions( std::vector< Node > &vec_node );
+ StringsPreprocess(SkolemCache *sc, context::UserContext *u);
+ ~StringsPreprocess();
+ /**
+  * Returns a node t' such that
+  *   (exists k) new_nodes => t = t'
+  * is valid, where k are the free skolems introduced when constructing
+  * new_nodes.
+  */
+ Node simplify(Node t, std::vector<Node> &new_nodes);
+ /**
+  * Applies simplifyRec on t until a fixed point is reached, and returns
+  * the resulting term t', which is such that
+  *   (exists k) new_nodes => t = t'
+  * is valid, where k are the free skolems introduced when constructing
+  * new_nodes.
+  */
+ Node processAssertion(Node t, std::vector<Node> &new_nodes);
+ /**
+  * Replaces all formulas t in vec_node with an equivalent formula t' that
+  * contains no free instances of extended functions (that is, extended
+  * functions may only appear beneath quantifiers). This applies simplifyRec
+  * on each assertion in vec_node until a fixed point is reached.
+  */
+ void processAssertions(std::vector<Node> &vec_node);
+
+private:
+ /** commonly used constants */
+ Node d_zero;
+ Node d_one;
+ Node d_empty_str;
+ /** pointer to the skolem cache used by this class */
+ SkolemCache *d_sc;
+ /**
+  * Applies simplify to all top-level extended function subterms of t. New
+  * assertions created in this reduction are added to new_nodes. The argument
+  * visited stores a cache of previous results.
+  */
+ Node simplifyRec(Node t,
+                  std::vector<Node> &new_nodes,
+                  std::map<Node, Node> &visited);
 };
 
 }/* CVC4::theory::strings namespace */
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2018-09-17 15:40:28 -0500
committer	GitHub <noreply@github.com>	2018-09-17 15:40:28 -0500
commit	d2b692cb2c054199d75a05f0f700e54fcb4f6c3c (patch)
tree	ce69ae1d1d547d48cf771896410208061949049e /src
parent	c90b5b15ca93e64683c2bbf85def8d7afb4edab8 (diff)