summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-06-19 16:48:10 -0500
committerGitHub <noreply@github.com>2020-06-19 16:48:10 -0500
commit22780596b561dff9b0eb5b0620252280a678944e (patch)
tree796c7a3a270f0509f36762ba88e938c644d9dfd4
parente0633c091c37b79f9e3a2517cf95113c788db083 (diff)
Convert more uses of strings to words (#4584)
Towards theory of sequences. This PR also adds support for sequences in default sygus grammars. Also removes an interface for mkEmptyWord which doesn't have an equivalent for sequences.
-rw-r--r--src/expr/type_node.cpp6
-rw-r--r--src/theory/quantifiers/sygus/sygus_grammar_cons.cpp18
-rw-r--r--src/theory/strings/regexp_operation.cpp2
-rw-r--r--src/theory/strings/strings_entail.cpp75
-rw-r--r--src/theory/strings/word.cpp35
-rw-r--r--src/theory/strings/word.h3
-rw-r--r--test/unit/theory/theory_strings_word_white.h3
7 files changed, 73 insertions, 69 deletions
diff --git a/src/expr/type_node.cpp b/src/expr/type_node.cpp
index edf88846f..ff4de9dc8 100644
--- a/src/expr/type_node.cpp
+++ b/src/expr/type_node.cpp
@@ -304,11 +304,7 @@ Node TypeNode::mkGroundValue() const
return *te;
}
-bool TypeNode::isStringLike() const
-{
- // TODO (cvc4-projects #23): sequence here
- return isString();
-}
+bool TypeNode::isStringLike() const { return isString() || isSequence(); }
bool TypeNode::isSubtypeOf(TypeNode t) const {
if(*this == t) {
diff --git a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
index 8df43087f..074b023a2 100644
--- a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
+++ b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
@@ -459,6 +459,10 @@ void CegGrammarConstructor::collectSygusGrammarTypesFor(
// theory of strings shares the integer type
TypeNode intType = NodeManager::currentNM()->integerType();
collectSygusGrammarTypesFor(intType,types);
+ if (range.isSequence())
+ {
+ collectSygusGrammarTypesFor(range.getSequenceElementType(), types);
+ }
}
else if (range.isFunction())
{
@@ -805,7 +809,7 @@ void CegGrammarConstructor::mkSygusDefaultGrammar(
sdts[i].addConstructor(kind, cargsBinary);
}
}
- else if (types[i].isString())
+ else if (types[i].isStringLike())
{
// concatenation
std::vector<TypeNode> cargsBinary;
@@ -823,6 +827,18 @@ void CegGrammarConstructor::mkSygusDefaultGrammar(
std::vector<TypeNode> cargsLen;
cargsLen.push_back(unres_t);
sdts[i_intType].addConstructor(STRING_LENGTH, cargsLen);
+ if (types[i].isSequence())
+ {
+ TypeNode etype = types[i].getSequenceElementType();
+ // retrieve element unresolved type
+ Assert(type_to_unres.find(etype) != type_to_unres.end());
+ TypeNode unresElemType = type_to_unres[etype];
+
+ Trace("sygus-grammar-def") << "...add for seq.unit" << std::endl;
+ std::vector<TypeNode> cargsSeqUnit;
+ cargsSeqUnit.push_back(unresElemType);
+ sdts[i].addConstructor(SEQ_UNIT, cargsSeqUnit);
+ }
}
else if (types[i].isArray())
{
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index f43babad2..a91210a7b 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -40,7 +40,7 @@ RegExpOpr::RegExpOpr()
std::vector<Node>{})),
d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
{
- d_emptyString = Word::mkEmptyWord(CONST_STRING);
+ d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
d_emptySingleton =
NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp
index e166845c7..9f502e1f6 100644
--- a/src/theory/strings/strings_entail.cpp
+++ b/src/theory/strings/strings_entail.cpp
@@ -37,8 +37,6 @@ bool StringsEntail::canConstantContainConcat(Node c,
int& lastc)
{
Assert(c.isConst());
- CVC4::String t = c.getConst<String>();
- const std::vector<unsigned>& tvec = t.getVec();
Assert(n.getKind() == STRING_CONCAT);
// must find constant components in order
size_t pos = 0;
@@ -50,19 +48,20 @@ bool StringsEntail::canConstantContainConcat(Node c,
{
firstc = firstc == -1 ? i : firstc;
lastc = i;
- CVC4::String s = n[i].getConst<String>();
- size_t new_pos = t.find(s, pos);
+ size_t new_pos = Word::find(c, n[i], pos);
if (new_pos == std::string::npos)
{
return false;
}
else
{
- pos = new_pos + s.size();
+ pos = new_pos + Word::getLength(n[i]);
}
}
else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
{
+ Assert(c.getType().isString());
+ const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
// find the first occurrence of a digit starting at pos
while (pos < tvec.size() && !String::isDigit(tvec[pos]))
{
@@ -116,7 +115,8 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
{
Assert(dir == 1 || dir == -1);
Assert(nr.empty());
- Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+ NodeManager* nm = NodeManager::currentNM();
+ Node zero = nm->mkConst(CVC4::Rational(0));
bool ret = false;
bool success;
unsigned sindex = 0;
@@ -139,18 +139,16 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
if (lbr.sgn() > 0)
{
Assert(ArithEntail::check(curr, true));
- CVC4::String s = n1[sindex_use].getConst<String>();
- Node ncl =
- NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
- Node next_s =
- NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
+ Node s = n1[sindex_use];
+ size_t slen = Word::getLength(s);
+ Node ncl = nm->mkConst(CVC4::Rational(slen));
+ Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
next_s = Rewriter::rewrite(next_s);
Assert(next_s.isConst());
// we can remove the entire constant
if (next_s.getConst<Rational>().sgn() >= 0)
{
- curr = Rewriter::rewrite(
- NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
+ curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
success = true;
sindex++;
}
@@ -160,25 +158,20 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
// lower bound minus the length of a concrete string is negative,
// hence lowerBound cannot be larger than long max
Assert(lbr < Rational(String::maxSize()));
- curr = Rewriter::rewrite(
- NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
+ curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
- Assert(lbsize < s.size());
+ Assert(lbsize < slen);
if (dir == 1)
{
// strip partially from the front
- nr.push_back(
- NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
- n1[sindex_use] = NodeManager::currentNM()->mkConst(
- s.suffix(s.size() - lbsize));
+ nr.push_back(Word::prefix(s, lbsize));
+ n1[sindex_use] = Word::suffix(s, slen - lbsize);
}
else
{
// strip partially from the back
- nr.push_back(
- NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
- n1[sindex_use] = NodeManager::currentNM()->mkConst(
- s.prefix(s.size() - lbsize));
+ nr.push_back(Word::suffix(s, lbsize));
+ n1[sindex_use] = Word::prefix(s, slen - lbsize);
}
ret = true;
}
@@ -496,8 +489,6 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
{
Assert(nb.empty());
Assert(ne.empty());
-
- NodeManager* nm = NodeManager::currentNM();
bool changed = false;
// for ( forwards, backwards ) direction
for (unsigned r = 0; r < 2; r++)
@@ -509,7 +500,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
bool removeComponent = false;
Node n1cmp = n1[index0];
- if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
+ if (n1cmp.isConst() && Word::isEmpty(n1cmp))
{
return false;
}
@@ -522,14 +513,15 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
<< ", dir = " << dir << std::endl;
if (n1cmp.isConst())
{
- CVC4::String s = n1cmp.getConst<String>();
+ Node s = n1cmp;
+ size_t slen = Word::getLength(s);
// overlap is an overapproximation of the number of characters
// n2[index1] can match in s
- unsigned overlap = s.size();
+ unsigned overlap = Word::getLength(s);
if (n2[index1].isConst())
{
- CVC4::String t = n2[index1].getConst<String>();
- std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
+ Node t = n2[index1];
+ std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
if (ret == std::string::npos)
{
if (n1.size() == 1)
@@ -545,7 +537,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
// This is used to partially strip off the endpoint
// e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
// str.contains( str.++( "c", x ), str.++( "cd", y ) )
- overlap = r == 0 ? s.overlap(t) : t.overlap(s);
+ overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
}
else
{
@@ -553,19 +545,20 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
// if there is no overlap
// e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
// --> str.contains( x, "a" )
- removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+ removeComponent =
+ ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
}
}
else if (sss.empty()) // only if not substr
{
- Assert(ret < s.size());
+ Assert(ret < slen);
// can strip off up to the find position, e.g.
// str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
// str.contains( str.++( "bc", x ), str.++( "b", y ) ),
// and
// str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
// str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
- overlap = s.size() - ret;
+ overlap = slen - ret;
}
}
else
@@ -573,7 +566,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
// inconclusive
}
// process the overlap
- if (overlap < s.size())
+ if (overlap < slen)
{
changed = true;
if (overlap == 0)
@@ -586,13 +579,13 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
// component
if (r == 0)
{
- nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
- n1[index0] = nm->mkConst(s.suffix(overlap));
+ nb.push_back(Word::prefix(s, slen - overlap));
+ n1[index0] = Word::suffix(s, overlap);
}
else
{
- ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
- n1[index0] = nm->mkConst(s.prefix(overlap));
+ ne.push_back(Word::suffix(s, slen - overlap));
+ n1[index0] = Word::prefix(s, overlap);
}
}
}
@@ -601,8 +594,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
{
if (n2[index1].isConst())
{
+ Assert(n2[index1].getType().isString());
CVC4::String t = n2[index1].getConst<String>();
-
if (n1.size() == 1)
{
// if n1.size()==1, then if n2[index1] is not a number, we can drop
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp
index 484b0df2d..35b315e35 100644
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -27,7 +27,8 @@ Node Word::mkEmptyWord(TypeNode tn)
{
if (tn.isString())
{
- return mkEmptyWord(CONST_STRING);
+ std::vector<unsigned> vec;
+ return NodeManager::currentNM()->mkConst(String(vec));
}
else if (tn.isSequence())
{
@@ -39,17 +40,6 @@ Node Word::mkEmptyWord(TypeNode tn)
return Node::null();
}
-Node Word::mkEmptyWord(Kind k)
-{
- if (k == CONST_STRING)
- {
- std::vector<unsigned> vec;
- return NodeManager::currentNM()->mkConst(String(vec));
- }
- Unimplemented();
- return Node::null();
-}
-
Node Word::mkWordFlatten(const std::vector<Node>& xs)
{
Assert(!xs.empty());
@@ -81,7 +71,8 @@ Node Word::mkWordFlatten(const std::vector<Node>& xs)
seq.push_back(c.toExpr());
}
}
- return NodeManager::currentNM()->mkConst(ExprSequence(tn.toType(), seq));
+ return NodeManager::currentNM()->mkConst(
+ ExprSequence(tn.getSequenceElementType().toType(), seq));
}
Unimplemented();
return Node::null();
@@ -98,17 +89,17 @@ size_t Word::getLength(TNode x)
{
return x.getConst<ExprSequence>().getSequence().size();
}
- Unimplemented();
+ Unimplemented() << "Word::getLength on " << x;
return 0;
}
std::vector<Node> Word::getChars(TNode x)
{
Kind k = x.getKind();
+ std::vector<Node> ret;
+ NodeManager* nm = NodeManager::currentNM();
if (k == CONST_STRING)
{
- std::vector<Node> ret;
- NodeManager* nm = NodeManager::currentNM();
std::vector<unsigned> ccVec;
const std::vector<unsigned>& cvec = x.getConst<String>().getVec();
for (unsigned chVal : cvec)
@@ -120,8 +111,18 @@ std::vector<Node> Word::getChars(TNode x)
}
return ret;
}
+ else if (k == CONST_SEQUENCE)
+ {
+ Type t = x.getConst<ExprSequence>().getType();
+ const Sequence& sx = x.getConst<ExprSequence>().getSequence();
+ const std::vector<Node>& vec = sx.getVec();
+ for (const Node& v : vec)
+ {
+ ret.push_back(nm->mkConst(ExprSequence(t, {v.toExpr()})));
+ }
+ return ret;
+ }
Unimplemented();
- std::vector<Node> ret;
return ret;
}
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h
index 4ebe43e2f..3b15b763a 100644
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -33,9 +33,6 @@ class Word
/** make empty constant of type tn */
static Node mkEmptyWord(TypeNode tn);
- /** make empty constant of kind k */
- static Node mkEmptyWord(Kind k);
-
/** make word from constants in (non-empty) vector vec */
static Node mkWordFlatten(const std::vector<Node>& xs);
diff --git a/test/unit/theory/theory_strings_word_white.h b/test/unit/theory/theory_strings_word_white.h
index bc6279cb8..2e29d50b8 100644
--- a/test/unit/theory/theory_strings_word_white.h
+++ b/test/unit/theory/theory_strings_word_white.h
@@ -63,7 +63,8 @@ class TheoryStringsWordWhite : public CxxTest::TestSuite
Node cac = d_nm->mkConst(String("cac"));
Node abca = d_nm->mkConst(String("abca"));
- TS_ASSERT(Word::mkEmptyWord(kind::CONST_STRING) == empty);
+ TypeNode stringType = d_nm->stringType();
+ TS_ASSERT(Word::mkEmptyWord(stringType) == empty);
std::vector<Node> vec;
vec.push_back(abc);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback