From 22780596b561dff9b0eb5b0620252280a678944e Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Fri, 19 Jun 2020 16:48:10 -0500
Subject: Convert more uses of strings to words (#4584)

Towards theory of sequences.

This PR also adds support for sequences in default sygus grammars.

Also removes an interface for mkEmptyWord which doesn't have an equivalent for sequences.
---
 src/expr/type_node.cpp                             |  6 +-
 .../quantifiers/sygus/sygus_grammar_cons.cpp       | 18 +++++-
 src/theory/strings/regexp_operation.cpp            |  2 +-
 src/theory/strings/strings_entail.cpp              | 75 ++++++++++------------
 src/theory/strings/word.cpp                        | 35 +++++-----
 src/theory/strings/word.h                          |  3 -
 test/unit/theory/theory_strings_word_white.h       |  3 +-
 7 files changed, 73 insertions(+), 69 deletions(-)
diff --git a/src/expr/type_node.cpp b/src/expr/type_node.cpp
index edf88846f..ff4de9dc8 100644
--- a/src/expr/type_node.cpp
+++ b/src/expr/type_node.cpp
@@ -304,11 +304,7 @@ Node TypeNode::mkGroundValue() const
   return *te;
 }
 
-bool TypeNode::isStringLike() const
-{
-  // TODO (cvc4-projects #23): sequence here
-  return isString();
-}
+bool TypeNode::isStringLike() const { return isString() || isSequence(); }
 
 bool TypeNode::isSubtypeOf(TypeNode t) const {
   if(*this == t) {
diff --git a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
index 8df43087f..074b023a2 100644
--- a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
+++ b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
@@ -459,6 +459,10 @@ void CegGrammarConstructor::collectSygusGrammarTypesFor(
         // theory of strings shares the integer type
         TypeNode intType = NodeManager::currentNM()->integerType();
         collectSygusGrammarTypesFor(intType,types);
+        if (range.isSequence())
+        {
+          collectSygusGrammarTypesFor(range.getSequenceElementType(), types);
+        }
       }
       else if (range.isFunction())
       {
@@ -805,7 +809,7 @@ void CegGrammarConstructor::mkSygusDefaultGrammar(
         sdts[i].addConstructor(kind, cargsBinary);
       }
     }
-    else if (types[i].isString())
+    else if (types[i].isStringLike())
     {
       // concatenation
       std::vector<TypeNode> cargsBinary;
@@ -823,6 +827,18 @@ void CegGrammarConstructor::mkSygusDefaultGrammar(
       std::vector<TypeNode> cargsLen;
       cargsLen.push_back(unres_t);
       sdts[i_intType].addConstructor(STRING_LENGTH, cargsLen);
+      if (types[i].isSequence())
+      {
+        TypeNode etype = types[i].getSequenceElementType();
+        // retrieve element unresolved type
+        Assert(type_to_unres.find(etype) != type_to_unres.end());
+        TypeNode unresElemType = type_to_unres[etype];
+
+        Trace("sygus-grammar-def") << "...add for seq.unit" << std::endl;
+        std::vector<TypeNode> cargsSeqUnit;
+        cargsSeqUnit.push_back(unresElemType);
+        sdts[i].addConstructor(SEQ_UNIT, cargsSeqUnit);
+      }
     }
     else if (types[i].isArray())
     {
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index f43babad2..a91210a7b 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -40,7 +40,7 @@ RegExpOpr::RegExpOpr()
                                                std::vector<Node>{})),
       d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
 {
-  d_emptyString = Word::mkEmptyWord(CONST_STRING);
+  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
 
   d_emptySingleton =
       NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp
index e166845c7..9f502e1f6 100644
--- a/src/theory/strings/strings_entail.cpp
+++ b/src/theory/strings/strings_entail.cpp
@@ -37,8 +37,6 @@ bool StringsEntail::canConstantContainConcat(Node c,
                                              int& lastc)
 {
   Assert(c.isConst());
-  CVC4::String t = c.getConst<String>();
-  const std::vector<unsigned>& tvec = t.getVec();
   Assert(n.getKind() == STRING_CONCAT);
   // must find constant components in order
   size_t pos = 0;
@@ -50,19 +48,20 @@ bool StringsEntail::canConstantContainConcat(Node c,
     {
       firstc = firstc == -1 ? i : firstc;
       lastc = i;
-      CVC4::String s = n[i].getConst<String>();
-      size_t new_pos = t.find(s, pos);
+      size_t new_pos = Word::find(c, n[i], pos);
       if (new_pos == std::string::npos)
       {
         return false;
       }
       else
       {
-        pos = new_pos + s.size();
+        pos = new_pos + Word::getLength(n[i]);
       }
     }
     else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
     {
+      Assert(c.getType().isString());
+      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
       // find the first occurrence of a digit starting at pos
       while (pos < tvec.size() && !String::isDigit(tvec[pos]))
       {
@@ -116,7 +115,8 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
 {
   Assert(dir == 1 || dir == -1);
   Assert(nr.empty());
-  Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+  NodeManager* nm = NodeManager::currentNM();
+  Node zero = nm->mkConst(CVC4::Rational(0));
   bool ret = false;
   bool success;
   unsigned sindex = 0;
@@ -139,18 +139,16 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
           if (lbr.sgn() > 0)
           {
             Assert(ArithEntail::check(curr, true));
-            CVC4::String s = n1[sindex_use].getConst<String>();
-            Node ncl =
-                NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
-            Node next_s =
-                NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
+            Node s = n1[sindex_use];
+            size_t slen = Word::getLength(s);
+            Node ncl = nm->mkConst(CVC4::Rational(slen));
+            Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
             next_s = Rewriter::rewrite(next_s);
             Assert(next_s.isConst());
             // we can remove the entire constant
             if (next_s.getConst<Rational>().sgn() >= 0)
             {
-              curr = Rewriter::rewrite(
-                  NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
+              curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
               success = true;
               sindex++;
             }
@@ -160,25 +158,20 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
               // lower bound minus the length of a concrete string is negative,
               // hence lowerBound cannot be larger than long max
               Assert(lbr < Rational(String::maxSize()));
-              curr = Rewriter::rewrite(
-                  NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
+              curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
               uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
-              Assert(lbsize < s.size());
+              Assert(lbsize < slen);
               if (dir == 1)
               {
                 // strip partially from the front
-                nr.push_back(
-                    NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
-                n1[sindex_use] = NodeManager::currentNM()->mkConst(
-                    s.suffix(s.size() - lbsize));
+                nr.push_back(Word::prefix(s, lbsize));
+                n1[sindex_use] = Word::suffix(s, slen - lbsize);
               }
               else
               {
                 // strip partially from the back
-                nr.push_back(
-                    NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
-                n1[sindex_use] = NodeManager::currentNM()->mkConst(
-                    s.prefix(s.size() - lbsize));
+                nr.push_back(Word::suffix(s, lbsize));
+                n1[sindex_use] = Word::prefix(s, slen - lbsize);
               }
               ret = true;
             }
@@ -496,8 +489,6 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
 {
   Assert(nb.empty());
   Assert(ne.empty());
-
-  NodeManager* nm = NodeManager::currentNM();
   bool changed = false;
   // for ( forwards, backwards ) direction
   for (unsigned r = 0; r < 2; r++)
@@ -509,7 +500,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
       bool removeComponent = false;
       Node n1cmp = n1[index0];
 
-      if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
+      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
       {
         return false;
       }
@@ -522,14 +513,15 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
           << ", dir = " << dir << std::endl;
       if (n1cmp.isConst())
       {
-        CVC4::String s = n1cmp.getConst<String>();
+        Node s = n1cmp;
+        size_t slen = Word::getLength(s);
         // overlap is an overapproximation of the number of characters
         // n2[index1] can match in s
-        unsigned overlap = s.size();
+        unsigned overlap = Word::getLength(s);
         if (n2[index1].isConst())
         {
-          CVC4::String t = n2[index1].getConst<String>();
-          std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
+          Node t = n2[index1];
+          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
           if (ret == std::string::npos)
           {
             if (n1.size() == 1)
@@ -545,7 +537,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
               // This is used to partially strip off the endpoint
               // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
               // str.contains( str.++( "c", x ), str.++( "cd", y ) )
-              overlap = r == 0 ? s.overlap(t) : t.overlap(s);
+              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
             }
             else
             {
@@ -553,19 +545,20 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
               // if there is no overlap
               //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
               //        --> str.contains( x, "a" )
-              removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+              removeComponent =
+                  ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
             }
           }
           else if (sss.empty())  // only if not substr
           {
-            Assert(ret < s.size());
+            Assert(ret < slen);
             // can strip off up to the find position, e.g.
             // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
             // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
             // and
             // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
             // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
-            overlap = s.size() - ret;
+            overlap = slen - ret;
           }
         }
         else
@@ -573,7 +566,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
           // inconclusive
         }
         // process the overlap
-        if (overlap < s.size())
+        if (overlap < slen)
         {
           changed = true;
           if (overlap == 0)
@@ -586,13 +579,13 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
             // component
             if (r == 0)
             {
-              nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
-              n1[index0] = nm->mkConst(s.suffix(overlap));
+              nb.push_back(Word::prefix(s, slen - overlap));
+              n1[index0] = Word::suffix(s, overlap);
             }
             else
             {
-              ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
-              n1[index0] = nm->mkConst(s.prefix(overlap));
+              ne.push_back(Word::suffix(s, slen - overlap));
+              n1[index0] = Word::prefix(s, overlap);
             }
           }
         }
@@ -601,8 +594,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
       {
         if (n2[index1].isConst())
         {
+          Assert(n2[index1].getType().isString());
           CVC4::String t = n2[index1].getConst<String>();
-
           if (n1.size() == 1)
           {
             // if n1.size()==1, then if n2[index1] is not a number, we can drop
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp
index 484b0df2d..35b315e35 100644
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -27,7 +27,8 @@ Node Word::mkEmptyWord(TypeNode tn)
 {
   if (tn.isString())
   {
-    return mkEmptyWord(CONST_STRING);
+    std::vector<unsigned> vec;
+    return NodeManager::currentNM()->mkConst(String(vec));
   }
   else if (tn.isSequence())
   {
@@ -39,17 +40,6 @@ Node Word::mkEmptyWord(TypeNode tn)
   return Node::null();
 }
 
-Node Word::mkEmptyWord(Kind k)
-{
-  if (k == CONST_STRING)
-  {
-    std::vector<unsigned> vec;
-    return NodeManager::currentNM()->mkConst(String(vec));
-  }
-  Unimplemented();
-  return Node::null();
-}
-
 Node Word::mkWordFlatten(const std::vector<Node>& xs)
 {
   Assert(!xs.empty());
@@ -81,7 +71,8 @@ Node Word::mkWordFlatten(const std::vector<Node>& xs)
         seq.push_back(c.toExpr());
       }
     }
-    return NodeManager::currentNM()->mkConst(ExprSequence(tn.toType(), seq));
+    return NodeManager::currentNM()->mkConst(
+        ExprSequence(tn.getSequenceElementType().toType(), seq));
   }
   Unimplemented();
   return Node::null();
@@ -98,17 +89,17 @@ size_t Word::getLength(TNode x)
   {
     return x.getConst<ExprSequence>().getSequence().size();
   }
-  Unimplemented();
+  Unimplemented() << "Word::getLength on " << x;
   return 0;
 }
 
 std::vector<Node> Word::getChars(TNode x)
 {
   Kind k = x.getKind();
+  std::vector<Node> ret;
+  NodeManager* nm = NodeManager::currentNM();
   if (k == CONST_STRING)
   {
-    std::vector<Node> ret;
-    NodeManager* nm = NodeManager::currentNM();
     std::vector<unsigned> ccVec;
     const std::vector<unsigned>& cvec = x.getConst<String>().getVec();
     for (unsigned chVal : cvec)
@@ -120,8 +111,18 @@ std::vector<Node> Word::getChars(TNode x)
     }
     return ret;
   }
+  else if (k == CONST_SEQUENCE)
+  {
+    Type t = x.getConst<ExprSequence>().getType();
+    const Sequence& sx = x.getConst<ExprSequence>().getSequence();
+    const std::vector<Node>& vec = sx.getVec();
+    for (const Node& v : vec)
+    {
+      ret.push_back(nm->mkConst(ExprSequence(t, {v.toExpr()})));
+    }
+    return ret;
+  }
   Unimplemented();
-  std::vector<Node> ret;
   return ret;
 }
 
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h
index 4ebe43e2f..3b15b763a 100644
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -33,9 +33,6 @@ class Word
   /** make empty constant of type tn */
   static Node mkEmptyWord(TypeNode tn);
 
-  /** make empty constant of kind k */
-  static Node mkEmptyWord(Kind k);
-
   /** make word from constants in (non-empty) vector vec */
   static Node mkWordFlatten(const std::vector<Node>& xs);
 
diff --git a/test/unit/theory/theory_strings_word_white.h b/test/unit/theory/theory_strings_word_white.h
index bc6279cb8..2e29d50b8 100644
--- a/test/unit/theory/theory_strings_word_white.h
+++ b/test/unit/theory/theory_strings_word_white.h
@@ -63,7 +63,8 @@ class TheoryStringsWordWhite : public CxxTest::TestSuite
     Node cac = d_nm->mkConst(String("cac"));
     Node abca = d_nm->mkConst(String("abca"));
 
-    TS_ASSERT(Word::mkEmptyWord(kind::CONST_STRING) == empty);
+    TypeNode stringType = d_nm->stringType();
+    TS_ASSERT(Word::mkEmptyWord(stringType) == empty);
 
     std::vector<Node> vec;
     vec.push_back(abc);
-- 
cgit v1.2.3