summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2018-02-27 16:52:57 -0600
committerGitHub <noreply@github.com>2018-02-27 16:52:57 -0600
commit7f33294261869ab8f0caa8660222576a4ff7bcdc (patch)
tree51937131d7d3ef465f75157f6b6581547cf49a6a /src
parent9f5447f5ca07688ff7e6da7973d7f97d17678427 (diff)
Improve rewriter for string indexof (#1592)
Diffstat (limited to 'src')
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp239
-rw-r--r--src/util/regexp.cpp5
-rw-r--r--src/util/regexp.h5
3 files changed, 147 insertions, 102 deletions
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 54a5b4dcb..1555b0aa0 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -1818,107 +1818,143 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) {
Node TheoryStringsRewriter::rewriteIndexof( Node node ) {
Assert(node.getKind() == kind::STRING_STRIDOF);
- std::vector< Node > children;
- getConcat( node[0], children );
- //std::vector< Node > children1;
- //getConcat( node[1], children1 ); TODO
- std::size_t start = 0;
- std::size_t val2 = 0;
- if( node[2].isConst() ){
- CVC4::Rational RMAXINT(LONG_MAX);
- if( node[2].getConst<Rational>()>RMAXINT ){
- Assert(node[2].getConst<Rational>() <= RMAXINT, "Number exceeds LONG_MAX in string index_of");
- return NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) );
- }else if( node[2].getConst<Rational>().sgn()==-1 ){
- //constant negative
- return NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) );
- }else{
- val2 = node[2].getConst<Rational>().getNumerator().toUnsignedInt();
- start = val2;
- }
- }
- bool prefixNoOverlap = false;
- CVC4::String t;
- if( node[1].isConst() ){
- t = node[1].getConst<String>();
- }
- //unsigned ch1_index = 0;
- for( unsigned i=0; i<children.size(); i++ ){
- bool do_splice = false;
- if( children[i].isConst() ){
- CVC4::String s = children[i].getConst<String>();
- if( node[1].isConst() ){
- if( i==0 ){
- std::size_t ret = s.find( t, start );
- if( ret!=std::string::npos ) {
- //exact if start value was constant
- if( node[2].isConst() ){
- return NodeManager::currentNM()->mkConst( ::CVC4::Rational((unsigned) ret) );
- }
- }else{
- //exact if we scanned the entire string
- if( node[0].isConst() ){
- return NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) );
- }else{
- prefixNoOverlap = (s.overlap(t)==0);
- Trace("strings-rewrite-debug") << "Prefix no overlap : " << s << " " << t << " " << prefixNoOverlap << std::endl;
- }
- }
- }else if( !node[2].isConst() ){
- break;
- }else{
- std::size_t ret = s.find(t, start);
- //remove remaining children after finding the string
- if( ret!=std::string::npos ){
- Assert( ret+t.size()<=s.size() );
- children[i] = NodeManager::currentNM()->mkConst( ::CVC4::String( s.substr(0,ret+t.size()) ) );
- do_splice = true;
- }else{
- //if no overlap on last child, can remove
- if( t.overlap( s )==0 && i==children.size()-1 ){
- std::vector< Node > spl;
- spl.insert( spl.end(), children.begin(), children.begin()+i );
- return NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] );
- }
- }
- }
+ NodeManager* nm = NodeManager::currentNM();
+
+ if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
+ {
+ // z<0 implies str.indexof( x, y, z ) --> -1
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-neg");
+ }
+
+ if (node[1].isConst())
+ {
+ if (node[1].getConst<String>().size() == 0)
+ {
+ // str.indexof( x, "", z ) --> -1
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-empty");
+ }
+ }
+
+ // evaluation and simple cases
+ std::vector<Node> children0;
+ getConcat(node[0], children0);
+ if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
+ {
+ CVC4::Rational RMAXINT(CVC4::String::maxSize());
+ if (node[2].getConst<Rational>() > RMAXINT)
+ {
+ // We know that, due to limitations on the size of string constants
+ // in our implementation, that accessing a position greater than
+ // RMAXINT is guaranteed to be out of bounds.
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-max");
+ }
+ Assert(node[2].getConst<Rational>().sgn() >= 0);
+ unsigned start =
+ node[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ CVC4::String s = children0[0].getConst<String>();
+ CVC4::String t = node[1].getConst<String>();
+ std::size_t ret = s.find(t, start);
+ if (ret != std::string::npos)
+ {
+ Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
+ return returnRewrite(node, retv, "idof-find");
+ }
+ else if (children0.size() == 1)
+ {
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-nfind");
+ }
+ }
+
+ Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
+ Node len1 = nm->mkNode(kind::STRING_LENGTH, node[1]);
+ Node len0m2 = nm->mkNode(kind::MINUS, len0, node[2]);
+ if (checkEntailArith(len1, len0m2, true))
+ {
+ // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-len");
+ }
+
+ Node fstr = node[0];
+ if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
+ {
+ fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
+ fstr = Rewriter::rewrite(fstr);
+ if (fstr.isConst())
+ {
+ CVC4::String fs = fstr.getConst<String>();
+ if (fs.size() == 0)
+ {
+ // substr( x, z, len(x) ) --> "" implies str.indexof( x, y, z ) --> -1
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-base-len");
}
- //decrement the start index
- if( start>0 ){
- if( s.size()>start ){
- start = 0;
- }else{
- start = start - s.size();
+ }
+ }
+
+ Node cmp_con = nm->mkNode(kind::STRING_STRCTN, fstr, node[1]);
+ Node cmp_conr = Rewriter::rewrite(cmp_con);
+ if (cmp_conr.isConst())
+ {
+ if (cmp_conr.getConst<bool>())
+ {
+ if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
+ {
+ // past the first position in node[0] that contains node[1], we can drop
+ std::vector<Node> children1;
+ getConcat(node[1], children1);
+ std::vector<Node> nb;
+ std::vector<Node> ne;
+ int cc = componentContains(children0, children1, nb, ne, true, 1);
+ if (cc != -1 && !ne.empty())
+ {
+ // For example:
+ // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
+ Node nn = mkConcat(kind::STRING_CONCAT, children0);
+ Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
+ return returnRewrite(node, ret, "idof-def-ctn");
}
}
- }else if( !node[2].isConst() ){
- break;
- }else{
- if( children[i]==node[1] && start==0 ){
- //can remove beyond this
- do_splice = true;
- }
- }
- if( do_splice ){
- std::vector< Node > spl;
- //since we definitely will find the string, we can safely add the length of the constant non-overlapping prefix
- if( prefixNoOverlap ){
- Node pl = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, children[0] ) );
- Assert( pl.isConst() );
- Assert( node[2].isConst() );
- int new_start = val2 - pl.getConst<Rational>().getNumerator().toUnsignedInt();
- if( new_start<0 ){
- new_start = 0;
- }
- spl.insert( spl.end(), children.begin()+1, children.begin()+i+1 );
- return NodeManager::currentNM()->mkNode( kind::PLUS, pl,
- NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], NodeManager::currentNM()->mkConst( Rational(new_start) ) ) );
- }else{
- spl.insert( spl.end(), children.begin(), children.begin()+i+1 );
- return NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] );
+
+ // these rewrites are only possible if we will not return -1
+ Node l1 = nm->mkNode(kind::STRING_LENGTH, node[1]);
+ Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+ bool is_non_empty = checkEntailArith(l1, zero, true);
+
+ if (is_non_empty)
+ {
+ // strip symbolic length
+ Node new_len = node[2];
+ std::vector<Node> nr;
+ if (stripSymbolicLength(children0, nr, 1, new_len))
+ {
+ // For example:
+ // z>str.len( x1 ) and str.len( y )>0 and str.contains( x2, y )-->true
+ // implies
+ // str.indexof( str.++( x1, x2 ), y, z ) --->
+ // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
+ Node nn = mkConcat(kind::STRING_CONCAT, children0);
+ Node ret = nm->mkNode(
+ kind::PLUS,
+ nm->mkNode(kind::MINUS, node[2], new_len),
+ nm->mkNode(kind::STRING_STRIDOF, nn, node[1], new_len));
+ return returnRewrite(node, ret, "idof-strip-sym-len");
+ }
}
}
+ else
+ {
+ // str.contains( x, y ) --> false implies str.indexof(x,y,z) --> -1
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, "idof-nctn");
+ }
}
+
+ Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
return node;
}
@@ -2123,13 +2159,7 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n)
if (n[1].isConst())
{
CVC4::String s = n[1].getConst<String>();
- if (s.isEmptyString())
- {
- Assert(!n[0].isConst());
- Node ret = n[0].eqNode(n[1]);
- return returnRewrite(n, ret, "suf/prefix-empty");
- }
- else if (n[0].isConst())
+ if (n[0].isConst())
{
Node ret = NodeManager::currentNM()->mkConst(false);
CVC4::String t = n[0].getConst<String>();
@@ -2143,6 +2173,11 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n)
}
return returnRewrite(n, ret, "suf/prefix-const");
}
+ else if (s.isEmptyString())
+ {
+ Node ret = n[0].eqNode(n[1]);
+ return returnRewrite(n, ret, "suf/prefix-empty");
+ }
else if (s.size() == 1)
{
// (str.prefix x "A") and (str.suffix x "A") are equivalent to
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index d93c5426e..a7e5131ec 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -371,6 +371,11 @@ bool String::isDigit(unsigned character)
return c >= '0' && c <= '9';
}
+size_t String::maxSize()
+{
+ return std::numeric_limits<size_t>::max();
+}
+
int String::toNumber() const {
if (isNumber()) {
int ret = 0;
diff --git a/src/util/regexp.h b/src/util/regexp.h
index d51ef4372..efbb4579d 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -163,6 +163,11 @@ class CVC4_PUBLIC String {
*/
static bool isDigit(unsigned character);
+ /**
+ * Returns the maximum length of string representable by this class.
+ * Corresponds to the maximum size of d_str.
+ */
+ static size_t maxSize();
private:
// guarded
static unsigned char hexToDec(unsigned char c);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback