summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTianyi Liang <tianyi-liang@uiowa.edu>2015-02-05 18:27:47 -0600
committerTianyi Liang <tianyi-liang@uiowa.edu>2015-02-05 18:28:49 -0600
commit2bc57de07b8132ee584da614ae49e4d132818f67 (patch)
tree7fa90ef5a4139d19771b7297ea9a6088a0f12207
parent45b0ba984fde882d3cd762076de0f9ddce2485c7 (diff)
Improved string performance, thanks to Peter's benchmarks.
-rw-r--r--src/theory/strings/options7
-rw-r--r--src/theory/strings/regexp_operation.cpp316
-rw-r--r--src/theory/strings/regexp_operation.h2
-rw-r--r--src/theory/strings/theory_strings.cpp30
-rw-r--r--src/theory/strings/theory_strings.h1
-rw-r--r--src/theory/strings/theory_strings_preprocess.cpp13
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp113
-rw-r--r--src/theory/strings/theory_strings_type_rules.h4
-rw-r--r--src/theory/theory_model.cpp8
-rw-r--r--test/regress/regress0/strings/Makefile.am1
10 files changed, 445 insertions, 50 deletions
diff --git a/src/theory/strings/options b/src/theory/strings/options
index dc8c50966..11156b5a4 100644
--- a/src/theory/strings/options
+++ b/src/theory/strings/options
@@ -11,6 +11,9 @@ option stringExp strings-exp --strings-exp bool :default false :read-write
option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_equal(2) :predicate-include "smt/smt_engine.h"
the strategy of LB rule application: 0-lazy, 1-eager, 2-no
+option stdASCII strings-std-ascii --strings-std-ascii bool :default true :predicate less_equal(2) :predicate-include "smt/smt_engine.h"
+ the alphabet contains only characters from the standard ASCII or the extended one
+
option stringFMF strings-fmf --strings-fmf bool :default false :read-write
the finite model finding used by the theory of strings
@@ -26,7 +29,7 @@ option stringOpt2 strings-opt2 --strings-opt2 bool :default false
option stringIgnNegMembership strings-inm --strings-inm bool :default false
internal for strings: ignore negative membership constraints (fragment checking is needed, left to users for now)
-expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h"
- the cardinality of the characters used by the theory of strings, default 256
+#expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 128 :read-write
+# the cardinality of the characters used by the theory of strings, default 128 (for standard ASCII) or 256 (for extended ASCII)
endmodule
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index e09f47f0f..20b13f7b1 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -16,14 +16,15 @@
#include "theory/strings/regexp_operation.h"
#include "expr/kind.h"
+#include "theory/strings/options.h"
namespace CVC4 {
namespace theory {
namespace strings {
RegExpOpr::RegExpOpr()
- : d_lastchar( '\xff' ),
- RMAXINT( LONG_MAX )
+ : d_lastchar( options::stdASCII()? '\x7f' : '\xff' ),
+ RMAXINT( LONG_MAX )
{
d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
d_true = NodeManager::currentNM()->mkConst( true );
@@ -46,7 +47,7 @@ int RegExpOpr::gcd ( int a, int b ) {
}
bool RegExpOpr::checkConstRegExp( Node r ) {
- Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
+ Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl;
bool ret = true;
if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
ret = d_cstre_cache[r];
@@ -68,7 +69,7 @@ bool RegExpOpr::checkConstRegExp( Node r ) {
// 0-unknown, 1-yes, 2-no
int RegExpOpr::delta( Node r, Node &exp ) {
- Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
+ Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl;
int ret = 0;
if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
ret = d_delta_cache[r].first;
@@ -193,6 +194,14 @@ int RegExpOpr::delta( Node r, Node &exp ) {
ret = 2;
break;
}
+ case kind::REGEXP_LOOP: {
+ if(r[1] == d_zero) {
+ ret = 1;
+ } else {
+ ret = delta(r[0], exp);
+ }
+ break;
+ }
default: {
//Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
Unreachable();
@@ -211,7 +220,7 @@ int RegExpOpr::delta( Node r, Node &exp ) {
// 0-unknown, 1-yes, 2-no
int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
Assert( c.size() < 2 );
- Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
+ Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
int ret = 1;
retNode = d_emptyRegexp;
@@ -411,6 +420,26 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
break;
}
+ case kind::REGEXP_LOOP: {
+ if(r[1] == r[2] && r[1] == d_zero) {
+ ret = 2;
+ //retNode = d_emptyRegexp;
+ } else {
+ Node dc;
+ ret = derivativeS(r[0], c, dc);
+ if(dc==d_emptyRegexp) {
+ unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0],
+ NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))),
+ NodeManager::currentNM()->mkConst(CVC4::Rational(u-1)));
+ retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
+ } else {
+ retNode = d_emptyRegexp;
+ }
+ }
+ break;
+ }
default: {
//Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
Unreachable();
@@ -423,13 +452,13 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
d_deriv_cache[dv] = p;
}
- Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
+ Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
return ret;
}
Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
Assert( c.size() < 2 );
- Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
+ Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
Node retNode = d_emptyRegexp;
PairNodeStr dv = std::make_pair( r, c );
if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
@@ -519,7 +548,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
vec_nodes.push_back( dc );
}
}
- Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
+ //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
}
retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
@@ -559,17 +588,34 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
case kind::REGEXP_STAR: {
Node dc = derivativeSingle(r[0], c);
if(dc != d_emptyRegexp) {
- retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
+ retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
} else {
retNode = d_emptyRegexp;
}
break;
}
+ case kind::REGEXP_LOOP: {
+ if(r[1] == r[2] && r[1] == d_zero) {
+ retNode = d_emptyRegexp;
+ } else {
+ Node dc = derivativeSingle(r[0], c);
+ if(dc != d_emptyRegexp) {
+ unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0],
+ NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))),
+ NodeManager::currentNM()->mkConst(CVC4::Rational(u-1)));
+ retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
+ } else {
+ retNode = d_emptyRegexp;
+ }
+ }
+ //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
+ break;
+ }
default: {
- //TODO: special sym: sigma, none, all
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
Unreachable();
- //return Node::null();
}
}
if(retNode != d_emptyRegexp) {
@@ -577,7 +623,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
}
d_dv_cache[dv] = retNode;
}
- Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
+ Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
return retNode;
}
@@ -671,6 +717,14 @@ void RegExpOpr::firstChars( Node r, std::set<unsigned char> &pcset, SetNodes &pv
}
break;
}
+ case kind::REGEXP_RANGE: {
+ unsigned char a = r[0].getConst<String>().getFirstChar();
+ unsigned char b = r[1].getConst<String>().getFirstChar();
+ for(unsigned char c=a; c<=b; c++) {
+ cset.insert(c);
+ }
+ break;
+ }
case kind::STRING_TO_REGEXP: {
Node st = Rewriter::rewrite(r[0]);
if(st.isConst()) {
@@ -720,6 +774,10 @@ void RegExpOpr::firstChars( Node r, std::set<unsigned char> &pcset, SetNodes &pv
firstChars(r[0], cset, vset);
break;
}
+ case kind::REGEXP_LOOP: {
+ firstChars(r[0], cset, vset);
+ break;
+ }
default: {
Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl;
Unreachable();
@@ -843,7 +901,7 @@ bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< unsigned char > &ve
}
break;
default: {
- Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in follow of RegExp." << std::endl;
//AlwaysAssert( false );
//return Node::null();
return false;
@@ -895,6 +953,17 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
break;
}
+ case kind::REGEXP_RANGE: {
+ std::vector< Node > vec;
+ unsigned char a = r[0].getConst<String>().getFirstChar();
+ unsigned char b = r[1].getConst<String>().getFirstChar();
+ for(unsigned char c=a; c<=b; c++) {
+ Node tmp = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) ) ).negate();
+ vec.push_back( tmp );
+ }
+ conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec);
+ break;
+ }
case kind::STRING_TO_REGEXP: {
conc = s.eqNode(r[0]).negate();
break;
@@ -997,8 +1066,42 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
}
break;
}
+ case kind::REGEXP_LOOP: {
+ Assert(r.getNumChildren() == 3);
+ if(r[1] == r[2]) {
+ if(r[1] == d_zero) {
+ conc = s.eqNode(d_emptyString).negate();
+ } else if(r[1] == d_one) {
+ conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]).negate();
+ } else {
+ //unroll for now
+ unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+ std::vector<Node> vec;
+ for(unsigned i=0; i<l; i++) {
+ vec.push_back(r[0]);
+ }
+ Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec);
+ conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r2).negate();
+ }
+ } else {
+ Assert(r[1] == d_zero);
+ //unroll for now
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ std::vector<Node> vec;
+ vec.push_back(d_emptySingleton);
+ std::vector<Node> vec2;
+ for(unsigned i=1; i<=u; i++) {
+ vec2.push_back(r[0]);
+ Node r2 = i==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec);
+ vec.push_back(r2);
+ }
+ Node r3 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec);
+ conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r3).negate();
+ }
+ break;
+ }
default: {
- Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
+ Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
Assert( false, "Unsupported Term" );
}
}
@@ -1024,6 +1127,29 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
break;
}
+ case kind::REGEXP_RANGE: {
+ conc = s.eqNode( r[0] );
+ if(r[0] != r[1]) {
+ unsigned char a = r[0].getConst<String>().getFirstChar();
+ a += 1;
+ Node tmp = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s,
+ NodeManager::currentNM()->mkNode(kind::REGEXP_RANGE,
+ NodeManager::currentNM()->mkConst( CVC4::String(a) ),
+ r[1]));
+ conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp);
+ }
+ /*
+ unsigned char a = r[0].getConst<String>().getFirstChar();
+ unsigned char b = r[1].getConst<String>().getFirstChar();
+ std::vector< Node > vec;
+ for(unsigned char c=a; c<=b; c++) {
+ Node t2 = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) ));
+ vec.push_back( t2 );
+ }
+ conc = vec.empty()? d_emptySingleton : vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::OR, vec);
+ */
+ break;
+ }
case kind::STRING_TO_REGEXP: {
conc = s.eqNode(r[0]);
break;
@@ -1112,8 +1238,47 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
}
break;
}
+ case kind::REGEXP_LOOP: {
+ Assert(r.getNumChildren() == 3);
+ if(r[1] == d_zero) {
+ if(r[2] == d_zero) {
+ conc = s.eqNode( d_emptyString );
+ } else {
+ //R{0,n}
+ if(s != d_emptyString) {
+ Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
+ Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
+ Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
+ Node sk1ne = sk1.eqNode(d_emptyString).negate();
+ Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
+ Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2,
+ NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], d_zero, u1));
+ conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru);
+ conc = NodeManager::currentNM()->mkNode(kind::OR,
+ s.eqNode(d_emptyString), conc);
+ } else {
+ conc = d_true;
+ }
+ }
+ } else {
+ //R^n
+ Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
+ Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
+ Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
+ Node sk1ne = sk1.eqNode(d_emptyString).negate();
+ Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
+ Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2,
+ NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], u1, u1));
+ conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru);
+ }
+ break;
+ }
default: {
- Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
+ Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
Assert( false, "Unsupported Term" );
}
}
@@ -1193,6 +1358,10 @@ void RegExpOpr::getCharSet( Node r, std::set<unsigned char> &pcset, SetNodes &pv
getCharSet(r[0], cset, vset);
break;
}
+ case kind::REGEXP_LOOP: {
+ getCharSet(r[0], cset, vset);
+ break;
+ }
default: {
//Trace("strings-error") << "Unsupported term: " << r << " in getCharSet." << std::endl;
Unreachable();
@@ -1236,6 +1405,8 @@ bool RegExpOpr::containC2(unsigned cnt, Node n) {
}
} else if(n.getKind() == kind::REGEXP_STAR) {
return containC2(cnt, n[0]);
+ } else if(n.getKind() == kind::REGEXP_LOOP) {
+ return containC2(cnt, n[0]);
} else if(n.getKind() == kind::REGEXP_UNION) {
for( unsigned i=0; i<n.getNumChildren(); i++ ) {
if(containC2(cnt, n[i])) {
@@ -1264,7 +1435,7 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
r1 = d_emptySingleton;
r2 = d_emptySingleton;
} else if(n.getKind() == kind::REGEXP_RV) {
- Assert(n[0].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in RegExpOpr::containC2");
+ Assert(n[0].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in RegExpOpr::convert2");
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
r1 = d_emptySingleton;
if(cnt == y) {
@@ -1311,6 +1482,11 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
} else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) {
r1 = d_emptySingleton;
r2 = n;
+ } else if(n.getKind() == kind::REGEXP_LOOP) {
+ //TODO:LOOP
+ r1 = d_emptySingleton;
+ r2 = n;
+ //Unreachable();
} else {
//is it possible?
Unreachable();
@@ -1479,6 +1655,10 @@ Node RegExpOpr::removeIntersection(Node r) {
retNode = r;
break;
}
+ case kind::REGEXP_RANGE: {
+ retNode = r;
+ break;
+ }
case kind::STRING_TO_REGEXP: {
retNode = r;
break;
@@ -1515,6 +1695,11 @@ Node RegExpOpr::removeIntersection(Node r) {
retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, retNode) );
break;
}
+ case kind::REGEXP_LOOP: {
+ retNode = removeIntersection( r[0] );
+ retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, retNode, r[1], r[2]) );
+ break;
+ }
default: {
Unreachable();
}
@@ -1679,6 +1864,58 @@ void RegExpOpr::splitRegExp(Node r, std::vector< PairNodes > &pset) {
}
break;
}
+ case kind::REGEXP_LOOP: {
+ unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ if(l == u) {
+ //R^n
+ if(l == 0) {
+ PairNodes tmp1(d_emptySingleton, d_emptySingleton);
+ pset.push_back(tmp1);
+ } else if(l == 1) {
+ splitRegExp(r[0], pset);
+ } else {
+ std::vector< PairNodes > tset;
+ splitRegExp(r[0], tset);
+ for(unsigned j=0; j<l; j++) {
+ Node num1 = NodeManager::currentNM()->mkConst(CVC4::Rational(j));
+ Node r1 = j==0? d_emptySingleton : j==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num1, num1);
+ unsigned j2 = l - j - 1;
+ Node num2 = NodeManager::currentNM()->mkConst(CVC4::Rational(j2));
+ Node r2 = j2==0? d_emptySingleton : j2==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2);
+ for(unsigned i=0; i<tset.size(); i++) {
+ r1 = tset[i].first==d_emptySingleton? r1 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r1, tset[i].first);
+ r2 = tset[i].second==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r2);
+ PairNodes tmp2(r1, r2);
+ pset.push_back(tmp2);
+ }
+ }
+ }
+ } else {
+ //R{0,n}
+ PairNodes tmp1(d_emptySingleton, d_emptySingleton);
+ pset.push_back(tmp1);
+ std::vector< PairNodes > tset;
+ splitRegExp(r[0], tset);
+ pset.insert(pset.end(), tset.begin(), tset.end());
+ for(unsigned k=2; k<=u; k++) {
+ for(unsigned j=0; j<k; j++) {
+ Node num1 = NodeManager::currentNM()->mkConst(CVC4::Rational(j));
+ Node r1 = j==0? d_emptySingleton : j==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num1, num1);
+ unsigned j2 = k - j - 1;
+ Node num2 = NodeManager::currentNM()->mkConst(CVC4::Rational(j2));
+ Node r2 = j2==0? d_emptySingleton : j2==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2);
+ for(unsigned i=0; i<tset.size(); i++) {
+ r1 = tset[i].first==d_emptySingleton? r1 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r1, tset[i].first);
+ r2 = tset[i].second==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r2);
+ PairNodes tmp2(r1, r2);
+ pset.push_back(tmp2);
+ }
+ }
+ }
+ }
+ break;
+ }
case kind::REGEXP_PLUS: {
std::vector< PairNodes > tset;
splitRegExp(r[0], tset);
@@ -1741,6 +1978,10 @@ void RegExpOpr::flattenRegExp(Node r, std::vector< std::pair< CVC4::String, unsi
//TODO
break;
}
+ case kind::REGEXP_LOOP: {
+ //TODO
+ break;
+ }
default: {
Unreachable();
}
@@ -1757,6 +1998,10 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector<Node> &vec_or) {
vec_or.push_back(r);
break;
}
+ case kind::REGEXP_RANGE: {
+ vec_or.push_back(r);
+ break;
+ }
case kind::STRING_TO_REGEXP: {
vec_or.push_back(r);
break;
@@ -1801,6 +2046,10 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector<Node> &vec_or) {
vec_or.push_back(r);
break;
}
+ case kind::REGEXP_LOOP: {
+ vec_or.push_back(r);
+ break;
+ }
default: {
Unreachable();
}
@@ -1811,7 +2060,7 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector<Node> &vec_or) {
std::string RegExpOpr::niceChar(Node r) {
if(r.isConst()) {
std::string s = r.getConst<CVC4::String>().toString() ;
- return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
+ return s == "." ? "\\." : s;
} else {
std::string ss = "$" + r.toString();
return ss;
@@ -1820,16 +2069,16 @@ std::string RegExpOpr::niceChar(Node r) {
std::string RegExpOpr::mkString( Node r ) {
std::string retStr;
if(r.isNull()) {
- retStr = "Phi";
+ retStr = "\\E";
} else {
int k = r.getKind();
switch( k ) {
case kind::REGEXP_EMPTY: {
- retStr += "Phi";
+ retStr += "\\E";
break;
}
case kind::REGEXP_SIGMA: {
- retStr += "{W}";
+ retStr += ".";
break;
}
case kind::STRING_TO_REGEXP: {
@@ -1846,16 +2095,12 @@ std::string RegExpOpr::mkString( Node r ) {
break;
}
case kind::REGEXP_UNION: {
- if(r == d_sigma) {
- retStr += "{A}";
- } else {
- retStr += "(";
- for(unsigned i=0; i<r.getNumChildren(); ++i) {
- if(i != 0) retStr += "|";
- retStr += mkString( r[i] );
- }
- retStr += ")";
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += "|";
+ retStr += mkString( r[i] );
}
+ retStr += ")";
break;
}
case kind::REGEXP_INTER: {
@@ -1890,6 +2135,19 @@ std::string RegExpOpr::mkString( Node r ) {
retStr += "]";
break;
}
+ case kind::REGEXP_LOOP: {
+ retStr += "(";
+ retStr += mkString(r[0]);
+ retStr += ")";
+ retStr += "{";
+ retStr += r[1].getConst<Rational>().toString();
+ retStr += ",";
+ if(r.getNumChildren() == 3) {
+ retStr += r[2].getConst<Rational>().toString();
+ }
+ retStr += "}";
+ break;
+ }
case kind::REGEXP_RV: {
retStr += "<";
retStr += r[0].getConst<Rational>().getNumerator().toString();
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
index da84ed04c..012a573c1 100644
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -39,7 +39,7 @@ class RegExpOpr {
typedef std::pair< Node, Node > PairNodes;
private:
- const unsigned char d_lastchar;
+ unsigned char d_lastchar;
Node d_emptyString;
Node d_true;
Node d_false;
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index a2eb58cdc..3e705d213 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -87,6 +87,7 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu
d_true = NodeManager::currentNM()->mkConst( true );
d_false = NodeManager::currentNM()->mkConst( false );
+ d_card_size = 128;
//d_opt_regexp_gcd = true;
}
@@ -255,6 +256,10 @@ Node TheoryStrings::explain( TNode literal ){
void TheoryStrings::presolve() {
Debug("strings-presolve") << "TheoryStrings::Presolving : get fmf options " << (options::stringFMF() ? "true" : "false") << std::endl;
d_opt_fmf = options::stringFMF();
+
+ if(!options::stdASCII()) {
+ d_card_size = 256;
+ }
}
@@ -2332,8 +2337,8 @@ bool TheoryStrings::checkLengthsEqc() {
}
bool TheoryStrings::checkCardinality() {
- int cardinality = options::stringCharCardinality();
- Trace("strings-solve-debug2") << "get cardinality: " << cardinality << endl;
+ //int cardinality = options::stringCharCardinality();
+ //Trace("strings-solve-debug2") << "get cardinality: " << cardinality << endl;
std::vector< Node > eqcs;
getEquivalenceClasses( eqcs );
@@ -2347,9 +2352,9 @@ bool TheoryStrings::checkCardinality() {
Trace("strings-card") << "Number of strings with length equal to " << lr << " is " << cols[i].size() << std::endl;
if( cols[i].size() > 1 ) {
// size > c^k
- double k = 1.0 + std::log((double) cols[i].size() - 1) / log((double) cardinality);
+ double k = 1.0 + std::log((double) cols[i].size() - 1) / log((double) d_card_size);
unsigned int int_k = (unsigned int) k;
- //double c_k = pow ( (double)cardinality, (double)lr );
+ //double c_k = pow ( (double)d_card_size, (double)lr );
bool allDisequal = true;
for( std::vector< Node >::iterator itr1 = cols[i].begin();
@@ -2905,7 +2910,7 @@ bool TheoryStrings::checkMemberships() {
}
//}
- Trace("regexp-debug") << "... No Intersec Conflict in Memberships " << std::endl;
+ Trace("regexp-debug") << "... No Intersec Conflict in Memberships, addedLemma: " << addedLemma << std::endl;
if(!addedLemma) {
for( unsigned i=0; i<d_regexp_memberships.size(); i++ ) {
//check regular expression membership
@@ -3002,7 +3007,20 @@ bool TheoryStrings::checkMemberships() {
Trace("strings-regexp") << "Unroll/simplify membership of atomic term " << xr << std::endl;
//if so, do simple unrolling
std::vector< Node > nvec;
- d_regexp_opr.simplify(atom, nvec, polarity);
+
+ /*if(xr.isConst()) {
+ Node tmp = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, xr, r) );
+ if(tmp==d_true || tmp==d_false) {
+ if(!polarity) {
+ tmp = tmp==d_true? d_false : d_true;
+ }
+ nvec.push_back( tmp );
+ }
+ }*/
+
+ if(nvec.empty()) {
+ d_regexp_opr.simplify(atom, nvec, polarity);
+ }
Node antec = assertion;
if(d_regexp_ant.find(assertion) != d_regexp_ant.end()) {
antec = d_regexp_ant[assertion];
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 623647942..2003bcd54 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -128,6 +128,7 @@ private:
Node d_zero;
Node d_one;
CVC4::Rational RMAXINT;
+ unsigned d_card_size;
// Options
bool d_opt_fmf;
bool d_opt_regexp_gcd;
diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp
index 71db11fe1..a8cbcfac0 100644
--- a/src/theory/strings/theory_strings_preprocess.cpp
+++ b/src/theory/strings/theory_strings_preprocess.cpp
@@ -43,6 +43,14 @@ void StringsPreprocess::processRegExp( Node s, Node r, std::vector< Node > &ret
ret.push_back( eq );
break;
}
+ case kind::REGEXP_RANGE: {
+ Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
+ Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
+ ret.push_back( eq );
+ eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );
+ ret.push_back( eq );
+ break;
+ }
case kind::STRING_TO_REGEXP: {
Node eq = s.eqNode( r[0] );
ret.push_back( eq );
@@ -95,6 +103,11 @@ void StringsPreprocess::processRegExp( Node s, Node r, std::vector< Node > &ret
}
break;
}
+ case kind::REGEXP_LOOP: {
+ Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );
+ ret.push_back( eq );
+ break;
+ }
default: {
Trace("strings-error") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl;
Assert( false, "Unsupported Term" );
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 6960659e8..3396cc1a9 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -737,6 +737,56 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i
return false;
}
}
+ case kind::REGEXP_LOOP: {
+ unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
+ if(s.size() == index_start) {
+ return l==0? true : testConstStringInRegExp(s, index_start, r[0]);
+ } else if(l==0 && r[1]==r[2]) {
+ return false;
+ } else {
+ Assert(r.getNumChildren() == 3, "String rewriter error: LOOP has 2 childrens");
+ if(l==0) {
+ //R{0,u}
+ unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
+ for(unsigned len=s.size() - index_start; len>=1; len--) {
+ CVC4::String t = s.substr(index_start, len);
+ if(testConstStringInRegExp(t, 0, r[0])) {
+ if(len + index_start == s.size()) {
+ return true;
+ } else {
+ Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(u - 1) );
+ Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], r[1], num2);
+ if(testConstStringInRegExp(s, index_start+len, r2)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ } else {
+ //R{l,l}
+ Assert(r[1]==r[2], "String rewriter error: LOOP nums are not equal");
+ if(l>s.size() - index_start) {
+ if(testConstStringInRegExp(s, s.size(), r[0])) {
+ l = s.size() - index_start;
+ } else {
+ return false;
+ }
+ }
+ for(unsigned len=1; len<=s.size() - index_start; len++) {
+ CVC4::String t = s.substr(index_start, len);
+ if(testConstStringInRegExp(t, 0, r[0])) {
+ Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(l - 1) );
+ Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2);
+ if(testConstStringInRegExp(s, index_start+len, r2)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ }
+ }
default: {
Trace("strings-error") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl;
Unreachable();
@@ -1129,7 +1179,10 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) {
NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ),
node[0]);
} else if(node.getKind() == kind::REGEXP_RANGE) {
- std::vector< Node > vec_nodes;
+ if(node[0] == node[1]) {
+ retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, node[0] );
+ }
+ /*std::vector< Node > vec_nodes;
unsigned char c = node[0].getConst<String>().getFirstChar();
unsigned char end = node[1].getConst<String>().getFirstChar();
for(; c<=end; ++c) {
@@ -1140,18 +1193,64 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) {
retNode = vec_nodes[0];
} else {
retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
- }
+ }*/
} else if(node.getKind() == kind::REGEXP_LOOP) {
Node r = node[0];
if(r.getKind() == kind::REGEXP_STAR) {
retNode = r;
} else {
+ Node n1 = Rewriter::rewrite( node[1] );
+ if(!n1.isConst()) {
+ throw LogicException("re.loop contains non-constant integer (1).");
+ }
+ CVC4::Rational rz(0);
+ CVC4::Rational RMAXINT(LONG_MAX);
+ AlwaysAssert(rz <= n1.getConst<Rational>(), "Negative integer in string REGEXP_LOOP (1)");
+ Assert(n1.getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (1)");
+ unsigned l = n1.getConst<Rational>().getNumerator().toUnsignedInt();
+ if(node.getNumChildren() == 3) {
+ Node n2 = Rewriter::rewrite( node[2] );
+ if(!n2.isConst()) {
+ throw LogicException("re.loop contains non-constant integer (2).");
+ }
+ if(n1 == n2) {
+ if(l == 0) {
+ retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP,
+ NodeManager::currentNM()->mkConst(CVC4::String("")));
+ } else if(l == 1) {
+ retNode = node[0];
+ }
+ } else {
+ AlwaysAssert(rz <= n2.getConst<Rational>(), "Negative integer in string REGEXP_LOOP (2)");
+ Assert(n2.getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (2)");
+ unsigned u = n2.getConst<Rational>().getNumerator().toUnsignedInt();
+ AlwaysAssert(l <= u, "REGEXP_LOOP (1) > REGEXP_LOOP (2)");
+ if(l != 0) {
+ Node zero = NodeManager::currentNM()->mkConst( CVC4::Rational(0) );
+ Node num = NodeManager::currentNM()->mkConst( CVC4::Rational(u - l) );
+ Node t1 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, node[0], n1, n1);
+ Node t2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, node[0], zero, num);
+ retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, t1, t2);
+ }
+ }
+ } else {
+ retNode = l==0? NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, node[0]) :
+ NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
+ NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, node[0], n1, n1),
+ NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, node[0]));
+ }
+ }
+ /*else {
TNode n1 = Rewriter::rewrite( node[1] );
+ //
if(!n1.isConst()) {
throw LogicException("re.loop contains non-constant integer (1).");
}
+ CVC4::Rational rz(0);
CVC4::Rational RMAXINT(LONG_MAX);
+ AlwaysAssert(rz <= n1.getConst<Rational>(), "Negative integer in string REGEXP_LOOP (1)");
Assert(n1.getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (1)");
+ //
unsigned l = n1.getConst<Rational>().getNumerator().toUnsignedInt();
std::vector< Node > vec_nodes;
for(unsigned i=0; i<l; i++) {
@@ -1159,12 +1258,12 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) {
}
if(node.getNumChildren() == 3) {
TNode n2 = Rewriter::rewrite( node[2] );
- if(!n2.isConst()) {
- throw LogicException("re.loop contains non-constant integer (2).");
- }
+ //if(!n2.isConst()) {
+ // throw LogicException("re.loop contains non-constant integer (2).");
+ //}
Node n = vec_nodes.size()==0 ? NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(CVC4::String("")))
: vec_nodes.size()==1 ? r : prerewriteConcatRegExp(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes));
- Assert(n2.getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (2)");
+ //Assert(n2.getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (2)");
unsigned u = n2.getConst<Rational>().getNumerator().toUnsignedInt();
if(u <= l) {
retNode = n;
@@ -1185,7 +1284,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) {
:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes), rest) );
}
- }
+ }*/
Trace("strings-lp") << "Strings::lp " << node << " => " << retNode << std::endl;
}
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h
index e053f48da..13f3b3b73 100644
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -15,6 +15,7 @@
**/
#include "cvc4_private.h"
+#include "theory/strings/options.h"
#ifndef __CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H
#define __CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H
@@ -368,6 +369,9 @@ public:
if(ch[0] > ch[1]) {
throw TypeCheckingExceptionPrivate(n, "expecting the first constant is less or equal to the second one in regexp range");
}
+ if(options::stdASCII() && ch[1] > '\x7f') {
+ throw TypeCheckingExceptionPrivate(n, "expecting standard ASCII characters in regexp range, or please set the option strings-std-ascii to be false");
+ }
}
return nodeManager->regexpType();
}
diff --git a/src/theory/theory_model.cpp b/src/theory/theory_model.cpp
index 7d385c398..435abc568 100644
--- a/src/theory/theory_model.cpp
+++ b/src/theory/theory_model.cpp
@@ -852,11 +852,11 @@ Node TheoryEngineModelBuilder::normalize(TheoryModel* m, TNode r, std::map< Node
itMap = constantReps.find(m->d_equalityEngine->getRepresentative(ri));
if (itMap != constantReps.end()) {
ri = (*itMap).second;
- recurse = false;
+ recurse = false;
}
else if (!evalOnly) {
- recurse = false;
- }
+ recurse = false;
+ }
}
if (recurse) {
ri = normalize(m, ri, constantReps, evalOnly);
@@ -870,7 +870,7 @@ Node TheoryEngineModelBuilder::normalize(TheoryModel* m, TNode r, std::map< Node
retNode = NodeManager::currentNM()->mkNode( r.getKind(), children );
if (childrenConst) {
retNode = Rewriter::rewrite(retNode);
- Assert(retNode.getKind() == kind::APPLY_UF || retNode.isConst());
+ Assert(retNode.getKind()==kind::APPLY_UF || retNode.getKind()==kind::REGEXP_RANGE || retNode.isConst());
}
}
d_normalizedCache[r] = retNode;
diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am
index 414904919..32876bccd 100644
--- a/test/regress/regress0/strings/Makefile.am
+++ b/test/regress/regress0/strings/Makefile.am
@@ -22,7 +22,6 @@ TESTS = \
at001.smt2 \
bug001.smt2 \
bug002.smt2 \
- cardinality.smt2 \
escchar.smt2 \
escchar_25.smt2 \
str001.smt2 \
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback