diff options
author | Tianyi Liang <tianyi-liang@uiowa.edu> | 2014-02-28 11:05:09 -0600 |
---|---|---|
committer | Tianyi Liang <tianyi-liang@uiowa.edu> | 2014-02-28 11:05:09 -0600 |
commit | cdefee8664d3e99a4cd97c4affbc639937187d50 (patch) | |
tree | d13b964dc8c176fbb226baa810c0e0d4eaadb303 | |
parent | 7a6c462b0ce3adf52a9d44a5f98c53065fedc33d (diff) |
a new regular expression engine for solving both positive and negative membership constraints
21 files changed, 624 insertions, 386 deletions
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index 9b598e113..cec6255a4 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -1299,7 +1299,7 @@ builtinOp[CVC4::Kind& kind] | STRINRE_TOK { $kind = CVC4::kind::STRING_IN_REGEXP; } | STRTORE_TOK { $kind = CVC4::kind::STRING_TO_REGEXP; } | RECON_TOK { $kind = CVC4::kind::REGEXP_CONCAT; } - | REOR_TOK { $kind = CVC4::kind::REGEXP_OR; } + | REUNION_TOK { $kind = CVC4::kind::REGEXP_UNION; } | REINTER_TOK { $kind = CVC4::kind::REGEXP_INTER; } | RESTAR_TOK { $kind = CVC4::kind::REGEXP_STAR; } | REPLUS_TOK { $kind = CVC4::kind::REGEXP_PLUS; } @@ -1699,8 +1699,8 @@ STRSTOI_TOK : 'str.to.int' ; STRINRE_TOK : 'str.in.re'; STRTORE_TOK : 'str.to.re'; RECON_TOK : 're.++'; -REOR_TOK : 're.or'; -REINTER_TOK : 're.itr'; +REUNION_TOK : 're.union'; +REINTER_TOK : 're.inter'; RESTAR_TOK : 're.*'; REPLUS_TOK : 're.+'; REOPT_TOK : 're.opt'; diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index a2a925d13..c182906de 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -340,8 +340,8 @@ void Smt2Printer::toStream(std::ostream& out, TNode n, case kind::STRING_STOI: out << "str.to.int "; break; case kind::STRING_TO_REGEXP: out << "str.to.re "; break; case kind::REGEXP_CONCAT: out << "re.++ "; break; - case kind::REGEXP_OR: out << "re.or "; break; - case kind::REGEXP_INTER: out << "re.itr "; break; + case kind::REGEXP_UNION: out << "re.union "; break; + case kind::REGEXP_INTER: out << "re.inter "; break; case kind::REGEXP_STAR: out << "re.* "; break; case kind::REGEXP_PLUS: out << "re.+ "; break; case kind::REGEXP_OPT: out << "re.opt "; break; diff --git a/src/smt/smt_engine.cpp b/src/smt/smt_engine.cpp index 9abd6e165..4719af3c0 100644 --- a/src/smt/smt_engine.cpp +++ b/src/smt/smt_engine.cpp @@ -793,6 +793,14 @@ void SmtEngine::finalOptionsAreSet() { return; } + // set strings-exp + /* + if(!d_logic.hasEverything() && d_logic.isTheoryEnabled(THEORY_STRINGS) ) { + if(! options::stringExp.wasSetByUser()) { + options::stringExp.set( true ); + Trace("smt") << "turning on strings-exp, for the theory of strings" << std::endl; + } + }*/ // for strings if(options::stringExp()) { if( !d_logic.isQuantified() ) { @@ -808,11 +816,11 @@ void SmtEngine::finalOptionsAreSet() { options::fmfBoundInt.set( true ); Trace("smt") << "turning on fmf-bound-int, for strings-exp" << std::endl; } + /* if(! options::rewriteDivk.wasSetByUser()) { options::rewriteDivk.set( true ); Trace("smt") << "turning on rewrite-divk, for strings-exp" << std::endl; - } - + }*/ /* if(! options::stringFMF.wasSetByUser()) { options::stringFMF.set( true ); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index f8f8b9555..2c9ba4b3f 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -71,7 +71,7 @@ typerule CONST_REGEXP ::CVC4::theory::strings::RegExpConstantTypeRule # equal equal / less than / output operator STRING_TO_REGEXP 1 "convert string to regexp" operator REGEXP_CONCAT 2: "regexp concat" -operator REGEXP_OR 2: "regexp or" +operator REGEXP_UNION 2: "regexp union" operator REGEXP_INTER 2: "regexp intersection" operator REGEXP_STAR 1 "regexp *" operator REGEXP_PLUS 1 "regexp +" @@ -97,7 +97,7 @@ constant REGEXP_SIGMA \ # "a regexp contains all strings" typerule REGEXP_CONCAT ::CVC4::theory::strings::RegExpConcatTypeRule -typerule REGEXP_OR ::CVC4::theory::strings::RegExpOrTypeRule +typerule REGEXP_UNION ::CVC4::theory::strings::RegExpUnionTypeRule typerule REGEXP_INTER ::CVC4::theory::strings::RegExpInterTypeRule typerule REGEXP_STAR ::CVC4::theory::strings::RegExpStarTypeRule typerule REGEXP_PLUS ::CVC4::theory::strings::RegExpPlusTypeRule diff --git a/src/theory/strings/options b/src/theory/strings/options index 139eca6ac..9bc9e2582 100644 --- a/src/theory/strings/options +++ b/src/theory/strings/options @@ -5,19 +5,19 @@ module STRINGS "theory/strings/options.h" Strings theory -expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" - the cardinality of the characters used by the theory of strings, default 256 +option stringExp strings-exp --strings-exp bool :default false :read-write + experimental features in the theory of strings -option stringRegExpUnrollDepth strings-regexp-depth --strings-regexp-depth=N int16_t :default 10 :read-write - the depth of unrolloing regular expression used by the theory of strings, default 10 +option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_equal(2) :predicate-include "smt/smt_engine.h" + the strategy of LB rule application: 0-lazy, 1-eager, 2-no option stringFMF strings-fmf --strings-fmf bool :default false :read-write the finite model finding used by the theory of strings -option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_equal(2) :predicate-include "smt/smt_engine.h" - the strategy of LB rule application: 0-lazy, 1-eager, 2-no +expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" + the cardinality of the characters used by the theory of strings, default 256 -option stringExp strings-exp --strings-exp bool :default false :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" - experimental features in the theory of strings +option stringRegExpUnrollDepth strings-regexp-depth --strings-regexp-depth=N int16_t :default 10 :read-write + the depth of unrolloing regular expression used by the theory of strings, default 10 (deprecated) endmodule diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index e608a0533..6869e45f3 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -34,10 +34,16 @@ namespace strings { RegExpOpr::RegExpOpr() {
d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
+ d_true = NodeManager::currentNM()->mkConst( true );
+ d_false = NodeManager::currentNM()->mkConst( false );
+ d_emptyRegexp = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY );
+ d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
+ d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
// All Charactors = all printable ones 32-126
d_char_start = 'a';//' ';
d_char_end = 'c';//'~';
d_sigma = mkAllExceptOne( '\0' );
+ //d_sigma = NodeManager::currentNM()->mkConst( kind::REGEXP_SIGMA );
d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
}
@@ -79,8 +85,15 @@ int RegExpOpr::delta( Node r ) { } else {
int k = r.getKind();
switch( k ) {
- case kind::STRING_TO_REGEXP:
- {
+ case kind::REGEXP_EMPTY: {
+ ret = 2;
+ break;
+ }
+ case kind::REGEXP_SIGMA: {
+ ret = 2;
+ break;
+ }
+ case kind::STRING_TO_REGEXP: {
if(r[0].isConst()) {
if(r[0] == d_emptyString) {
ret = 1;
@@ -90,10 +103,9 @@ int RegExpOpr::delta( Node r ) { } else {
ret = 0;
}
- }
break;
- case kind::REGEXP_CONCAT:
- {
+ }
+ case kind::REGEXP_CONCAT: {
bool flag = false;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
int tmp = delta( r[i] );
@@ -107,10 +119,9 @@ int RegExpOpr::delta( Node r ) { if(!flag && ret != 2) {
ret = 1;
}
- }
break;
- case kind::REGEXP_OR:
- {
+ }
+ case kind::REGEXP_UNION: {
bool flag = false;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
int tmp = delta( r[i] );
@@ -124,10 +135,9 @@ int RegExpOpr::delta( Node r ) { if(!flag && ret != 1) {
ret = 2;
}
- }
break;
- case kind::REGEXP_INTER:
- {
+ }
+ case kind::REGEXP_INTER: {
bool flag = true;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
int tmp = delta( r[i] );
@@ -142,33 +152,29 @@ int RegExpOpr::delta( Node r ) { if(flag) {
ret = 1;
}
- }
break;
- case kind::REGEXP_STAR:
- {
- ret = 1;
}
+ case kind::REGEXP_STAR: {
+ ret = 1;
break;
- case kind::REGEXP_PLUS:
- {
- ret = delta( r[0] );
}
+ case kind::REGEXP_PLUS: {
+ ret = delta( r[0] );
break;
- case kind::REGEXP_OPT:
- {
- ret = 1;
}
+ case kind::REGEXP_OPT: {
+ ret = 1;
break;
- case kind::REGEXP_RANGE:
- {
- ret = 2;
}
+ case kind::REGEXP_RANGE: {
+ ret = 2;
break;
- default:
- //TODO: special sym: sigma, none, all
+ }
+ default: {
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
- //AlwaysAssert( false );
+ AlwaysAssert( false );
//return Node::null();
+ }
}
d_delta_cache[r] = ret;
}
@@ -176,63 +182,69 @@ int RegExpOpr::delta( Node r ) { return ret;
}
-//null - no solution
Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
Assert( c.size() < 2 );
Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
- Node retNode = Node::null();
+ Node retNode = d_emptyRegexp;
PairDvStr dv = std::make_pair( r, c );
if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
retNode = d_dv_cache[dv];
} else if( c.isEmptyString() ){
int tmp = delta( r );
if(tmp == 0) {
- retNode = Node::null();
// TODO variable
+ retNode = d_emptyRegexp;
} else if(tmp == 1) {
retNode = r;
} else {
- retNode = Node::null();
+ retNode = d_emptyRegexp;
}
} else {
int k = r.getKind();
switch( k ) {
- case kind::STRING_TO_REGEXP:
- {
+ case kind::REGEXP_EMPTY: {
+ retNode = d_emptyRegexp;
+ break;
+ }
+ case kind::REGEXP_SIGMA: {
+ retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
+ break;
+ }
+ case kind::STRING_TO_REGEXP: {
if(r[0].isConst()) {
if(r[0] == d_emptyString) {
- retNode = Node::null();
+ retNode = d_emptyRegexp;
} else {
if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
- retNode = r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
- NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
+ retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
+ r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
} else {
- retNode = Node::null();
+ retNode = d_emptyRegexp;
}
}
} else {
- retNode = Node::null();
// TODO variable
+ retNode = d_emptyRegexp;
}
- }
break;
- case kind::REGEXP_CONCAT:
- {
+ }
+ case kind::REGEXP_CONCAT: {
+ Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
std::vector< Node > vec_nodes;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
Node dc = derivativeSingle(r[i], c);
- if(!dc.isNull()) {
+ if(dc != d_emptyRegexp) {
std::vector< Node > vec_nodes2;
- if(dc != d_emptyString) {
+ if(dc != rees) {
vec_nodes2.push_back( dc );
}
for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
- if(r[j] != d_emptyString) {
+ if(r[j] != rees) {
vec_nodes2.push_back( r[j] );
}
}
- Node tmp = vec_nodes2.size()==0 ? d_emptyString :
- ( vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ) );
+ Node tmp = vec_nodes2.size()==0 ? rees :
+ vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
vec_nodes.push_back( tmp );
}
@@ -242,34 +254,32 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { break;
}
}
- retNode = vec_nodes.size() == 0 ? Node::null() :
- ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
- }
+ retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
break;
- case kind::REGEXP_OR:
- {
+ }
+ case kind::REGEXP_UNION: {
std::vector< Node > vec_nodes;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
Node dc = derivativeSingle(r[i], c);
- if(!dc.isNull()) {
+ if(dc != d_emptyRegexp) {
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
vec_nodes.push_back( dc );
}
}
Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
}
- retNode = vec_nodes.size() == 0 ? Node::null() :
- ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
- }
+ retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
break;
- case kind::REGEXP_INTER:
- {
+ }
+ case kind::REGEXP_INTER: {
bool flag = true;
bool flag_sg = false;
std::vector< Node > vec_nodes;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
Node dc = derivativeSingle(r[i], c);
- if(!dc.isNull()) {
+ if(dc != d_emptyRegexp) {
if(dc == d_sigma_star) {
flag_sg = true;
} else {
@@ -286,56 +296,31 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { if(vec_nodes.size() == 0 && flag_sg) {
retNode = d_sigma_star;
} else {
- retNode = vec_nodes.size() == 0 ? Node::null() :
+ retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
}
} else {
- retNode = Node::null();
+ retNode = d_emptyRegexp;
}
- }
break;
- case kind::REGEXP_STAR:
- {
- Node dc = derivativeSingle(r[0], c);
- if(!dc.isNull()) {
- retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
- } else {
- retNode = Node::null();
- }
}
- break;
- case kind::REGEXP_PLUS:
- {
+ case kind::REGEXP_STAR: {
Node dc = derivativeSingle(r[0], c);
- if(!dc.isNull()) {
- retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
+ if(dc != d_emptyRegexp) {
+ retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
} else {
- retNode = Node::null();
+ retNode = d_emptyRegexp;
}
- }
- break;
- case kind::REGEXP_OPT:
- {
- retNode = derivativeSingle(r[0], c);
- }
break;
- case kind::REGEXP_RANGE:
- {
- char ch = c.getFirstChar();
- if (ch >= r[0].getConst< CVC4::String >().getFirstChar() && ch <= r[1].getConst< CVC4::String >().getFirstChar() ) {
- retNode = d_emptyString;
- } else {
- retNode = Node::null();
- }
}
- break;
- default:
+ default: {
//TODO: special sym: sigma, none, all
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
- //AlwaysAssert( false );
+ Assert( false, "Unsupported Term" );
//return Node::null();
+ }
}
- if(!retNode.isNull()) {
+ if(retNode != d_emptyRegexp) {
retNode = Rewriter::rewrite( retNode );
}
d_dv_cache[dv] = retNode;
@@ -368,7 +353,7 @@ bool RegExpOpr::guessLength( Node r, int &co ) { return true;
}
break;
- case kind::REGEXP_OR:
+ case kind::REGEXP_UNION:
{
int g_co;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
@@ -474,7 +459,7 @@ bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) return true;
}
break;
- case kind::REGEXP_OR:
+ case kind::REGEXP_UNION:
{
bool flag = false;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
@@ -557,7 +542,7 @@ Node RegExpOpr::mkAllExceptOne( char exp_c ) { vec_nodes.push_back( n );
}
}
- return NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
}
Node RegExpOpr::complement( Node r ) {
@@ -590,7 +575,7 @@ Node RegExpOpr::complement( Node r ) { }
Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star );
vec_nodes.push_back( tmp );
- retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
}
} else {
Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl;
@@ -613,10 +598,10 @@ Node RegExpOpr::complement( Node r ) { }
vec_nodes.push_back( tmp );
}
- retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
}
break;
- case kind::REGEXP_OR:
+ case kind::REGEXP_UNION:
{
std::vector< Node > vec_nodes;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
@@ -633,7 +618,7 @@ Node RegExpOpr::complement( Node r ) { Node tmp = complement( r[i] );
vec_nodes.push_back( tmp );
}
- retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
}
break;
case kind::REGEXP_STAR:
@@ -657,13 +642,163 @@ Node RegExpOpr::complement( Node r ) { }
//simplify
-void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes) {
+void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
+ Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
Assert(t.getKind() == kind::STRING_IN_REGEXP);
Node str = Rewriter::rewrite(t[0]);
Node re = Rewriter::rewrite(t[1]);
- simplifyRegExp( str, re, new_nodes );
+ if(polarity) {
+ simplifyPRegExp( str, re, new_nodes );
+ } else {
+ simplifyNRegExp( str, re, new_nodes );
+ }
+ Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
+ for(unsigned i=0; i<new_nodes.size(); i++) {
+ Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
+ }
}
-void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
+void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
+ std::pair < Node, Node > p(s, r);
+ std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
+ if(itr != d_simpl_neg_cache.end()) {
+ new_nodes.push_back( itr->second );
+ return;
+ } else {
+ int k = r.getKind();
+ Node conc;
+ switch( k ) {
+ case kind::REGEXP_EMPTY: {
+ conc = d_true;
+ break;
+ }
+ case kind::REGEXP_SIGMA: {
+ conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
+ break;
+ }
+ case kind::STRING_TO_REGEXP: {
+ conc = s.eqNode(r[0]).negate();
+ break;
+ }
+ case kind::REGEXP_CONCAT: {
+ //TODO: rewrite empty
+ Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
+ Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
+ Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
+ NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
+ Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
+ Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
+ Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
+ Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
+ Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
+ Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
+ if(r[0].getKind() == kind::STRING_TO_REGEXP) {
+ s1r1 = s1.eqNode(r[0][0]).negate();
+ } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
+ s1r1 = d_true;
+ }
+ Node r2 = r[1];
+ if(r.getNumChildren() > 2) {
+ std::vector< Node > nvec;
+ for(unsigned i=1; i<r.getNumChildren(); i++) {
+ nvec.push_back( r[i] );
+ }
+ r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
+ }
+ r2 = Rewriter::rewrite(r2);
+ Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
+ if(r2.getKind() == kind::STRING_TO_REGEXP) {
+ s2r2 = s2.eqNode(r2[0]).negate();
+ } else if(r2.getKind() == kind::REGEXP_EMPTY) {
+ s2r2 = d_true;
+ }
+
+ conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
+ conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
+ conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
+ conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
+ conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
+ break;
+ }
+ case kind::REGEXP_UNION: {
+ std::vector< Node > c_and;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(r[i].getKind() == kind::STRING_TO_REGEXP) {
+ c_and.push_back( r[i][0].eqNode(s).negate() );
+ } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
+ continue;
+ } else {
+ c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
+ }
+ }
+ conc = c_and.size() == 0 ? d_true :
+ c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
+ break;
+ }
+ case kind::REGEXP_INTER: {
+ bool emptyflag = false;
+ std::vector< Node > c_or;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(r[i].getKind() == kind::STRING_TO_REGEXP) {
+ c_or.push_back( r[i][0].eqNode(s).negate() );
+ } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
+ emptyflag = true;
+ break;
+ } else {
+ c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
+ }
+ }
+ if(emptyflag) {
+ conc = d_true;
+ } else {
+ conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
+ }
+ break;
+ }
+ case kind::REGEXP_STAR: {
+ if(s == d_emptyString) {
+ conc = d_false;
+ } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
+ conc = s.eqNode(d_emptyString).negate();
+ } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
+ conc = d_false;
+ } else {
+ Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
+ Node sne = s.eqNode(d_emptyString).negate();
+ Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
+ Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
+ Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
+ NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
+ //internal
+ Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
+ Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
+ //Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
+ //Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
+ //Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
+ //Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
+ //Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
+ Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
+ Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
+
+ conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
+ //conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
+ //conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
+ conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
+ conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
+ conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
+ }
+ break;
+ }
+ default: {
+ Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
+ AlwaysAssert( false, "Unsupported Term" );
+ }
+ }
+ conc = Rewriter::rewrite( conc );
+ new_nodes.push_back( conc );
+ d_simpl_neg_cache[p] = conc;
+ }
+}
+void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
std::pair < Node, Node > p(s, r);
std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
if(itr != d_simpl_cache.end()) {
@@ -671,34 +806,24 @@ void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ) return;
} else {
int k = r.getKind();
+ Node conc;
switch( k ) {
- case kind::REGEXP_EMPTY:
- {
- Node eq = NodeManager::currentNM()->mkConst( false );
- new_nodes.push_back( eq );
- d_simpl_cache[p] = eq;
- }
+ case kind::REGEXP_EMPTY: {
+ conc = d_false;
break;
- case kind::REGEXP_SIGMA:
- {
- Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
- Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
- new_nodes.push_back( eq );
- d_simpl_cache[p] = eq;
}
+ case kind::REGEXP_SIGMA: {
+ conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
break;
- case kind::STRING_TO_REGEXP:
- {
- Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] );
- new_nodes.push_back( eq );
- d_simpl_cache[p] = eq;
}
+ case kind::STRING_TO_REGEXP: {
+ conc = s.eqNode(r[0]);
break;
- case kind::REGEXP_CONCAT:
- {
+ }
+ case kind::REGEXP_CONCAT: {
+ std::vector< Node > nvec;
std::vector< Node > cc;
bool emptyflag = false;
- Node ff = NodeManager::currentNM()->mkConst( false );
for(unsigned i=0; i<r.getNumChildren(); ++i) {
if(r[i].getKind() == kind::STRING_TO_REGEXP) {
cc.push_back( r[i][0] );
@@ -706,71 +831,87 @@ void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ) emptyflag = true;
break;
} else {
- Node sk = NodeManager::currentNM()->mkSkolem( "rcon_$$", s.getType(), "created for regular expression concat" );
- simplifyRegExp( sk, r[i], new_nodes );
- if(new_nodes.size() != 0) {
- if(new_nodes[new_nodes.size() - 1] == ff) {
- emptyflag = true;
- break;
- }
- }
- cc.push_back( sk );
+ Node sk = NodeManager::currentNM()->mkSkolem( "rc_$$", s.getType(), "created for regular expression concat" );
+ Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
+ nvec.push_back(lem);
+ cc.push_back(sk);
}
}
if(emptyflag) {
- new_nodes.push_back( ff );
- d_simpl_cache[p] = ff;
+ conc = d_false;
} else {
- Node cc_eq = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
- new_nodes.push_back( cc_eq );
- d_simpl_cache[p] = cc_eq;
+ Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
+ nvec.push_back(lem);
+ conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
}
- }
break;
- case kind::REGEXP_OR:
- {
+ }
+ case kind::REGEXP_UNION: {
std::vector< Node > c_or;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
- simplifyRegExp( s, r[i], c_or );
+ if(r[i].getKind() == kind::STRING_TO_REGEXP) {
+ c_or.push_back( r[i][0].eqNode(s) );
+ } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
+ continue;
+ } else {
+ c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
+ }
}
- Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or );
- new_nodes.push_back( eq );
- d_simpl_cache[p] = eq;
- }
+ conc = c_or.size() == 0 ? d_false :
+ c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
break;
- case kind::REGEXP_INTER:
- {
- Node nfalse = NodeManager::currentNM()->mkConst( false );
+ }
+ case kind::REGEXP_INTER: {
+ std::vector< Node > c_and;
+ bool emptyflag = false;
for(unsigned i=0; i<r.getNumChildren(); ++i) {
- simplifyRegExp( s, r[i], new_nodes );
- if(new_nodes.size() != 0) {
- if(new_nodes[new_nodes.size() - 1] == nfalse) {
- break;
- }
+ if(r[i].getKind() == kind::STRING_TO_REGEXP) {
+ c_and.push_back( r[i][0].eqNode(s) );
+ } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
+ emptyflag = true;
+ break;
+ } else {
+ c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
}
}
- }
+ if(emptyflag) {
+ conc = d_false;
+ } else {
+ conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
+ }
break;
- case kind::REGEXP_STAR:
- {
- Node eq;
- if(r[0].getKind() == kind::REGEXP_EMPTY) {
- eq = NodeManager::currentNM()->mkConst( false );
+ }
+ case kind::REGEXP_STAR: {
+ if(s == d_emptyString) {
+ conc = d_true;
+ } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
+ conc = s.eqNode(d_emptyString);
} else if(r[0].getKind() == kind::REGEXP_SIGMA) {
- eq = NodeManager::currentNM()->mkConst( true );
+ conc = d_true;
} else {
- eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );
+ Node se = s.eqNode(d_emptyString);
+ Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
+ Node sk1 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
+ Node sk2 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
+ Node s1nz = sk1.eqNode(d_emptyString).negate();
+ Node s2nz = sk2.eqNode(d_emptyString).negate();
+ Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
+ Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
+ Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
+
+ conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
+ conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
}
- new_nodes.push_back( eq );
- d_simpl_cache[p] = eq;
- }
- break;
- default:
- //TODO: special sym: sigma, none, all
- Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl;
- Assert( false );
break;
+ }
+ default: {
+ Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
+ AlwaysAssert( false, "Unsupported Term" );
+ }
}
+ conc = Rewriter::rewrite( conc );
+ new_nodes.push_back( conc );
+ d_simpl_cache[p] = conc;
}
}
@@ -790,33 +931,28 @@ std::string RegExpOpr::mkString( Node r ) { } else {
int k = r.getKind();
switch( k ) {
- case kind::REGEXP_EMPTY:
- {
+ case kind::REGEXP_EMPTY: {
retStr += "Empty";
- }
break;
- case kind::REGEXP_SIGMA:
- {
- retStr += "{W}";
}
+ case kind::REGEXP_SIGMA: {
+ retStr += "{W}";
break;
- case kind::STRING_TO_REGEXP:
- {
- retStr += niceChar( r[0] );
}
+ case kind::STRING_TO_REGEXP: {
+ retStr += niceChar( r[0] );
break;
- case kind::REGEXP_CONCAT:
- {
+ }
+ case kind::REGEXP_CONCAT: {
retStr += "(";
for(unsigned i=0; i<r.getNumChildren(); ++i) {
if(i != 0) retStr += ".";
retStr += mkString( r[i] );
}
retStr += ")";
- }
break;
- case kind::REGEXP_OR:
- {
+ }
+ case kind::REGEXP_UNION: {
if(r == d_sigma) {
retStr += "{A}";
} else {
@@ -827,47 +963,41 @@ std::string RegExpOpr::mkString( Node r ) { }
retStr += ")";
}
- }
break;
- case kind::REGEXP_INTER:
- {
+ }
+ case kind::REGEXP_INTER: {
retStr += "(";
for(unsigned i=0; i<r.getNumChildren(); ++i) {
if(i != 0) retStr += "&";
retStr += mkString( r[i] );
}
retStr += ")";
- }
break;
- case kind::REGEXP_STAR:
- {
+ }
+ case kind::REGEXP_STAR: {
retStr += mkString( r[0] );
retStr += "*";
- }
break;
- case kind::REGEXP_PLUS:
- {
+ }
+ case kind::REGEXP_PLUS: {
retStr += mkString( r[0] );
retStr += "+";
- }
break;
- case kind::REGEXP_OPT:
- {
+ }
+ case kind::REGEXP_OPT: {
retStr += mkString( r[0] );
retStr += "?";
- }
break;
- case kind::REGEXP_RANGE:
- {
+ }
+ case kind::REGEXP_RANGE: {
retStr += "[";
retStr += niceChar( r[0] );
retStr += "-";
retStr += niceChar( r[1] );
retStr += "]";
- }
break;
+ }
default:
- //TODO: special sym: sigma, none, all
Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
//AlwaysAssert( false );
//return Node::null();
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index d7dde018a..32bfb2b3d 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -32,25 +32,33 @@ class RegExpOpr { typedef std::pair< Node, CVC4::String > PairDvStr;
private:
Node d_emptyString;
+ Node d_true;
+ Node d_false;
+ Node d_emptyRegexp;
+ Node d_zero;
+ Node d_one;
+
char d_char_start;
char d_char_end;
Node d_sigma;
Node d_sigma_star;
std::map< std::pair< Node, Node >, Node > d_simpl_cache;
+ std::map< std::pair< Node, Node >, Node > d_simpl_neg_cache;
std::map< Node, Node > d_compl_cache;
std::map< Node, int > d_delta_cache;
std::map< PairDvStr, Node > d_dv_cache;
std::map< Node, bool > d_cstre_cache;
//bool checkStarPlus( Node t );
- void simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes );
+ void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes );
+ void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes );
std::string niceChar( Node r );
int gcd ( int a, int b );
public:
RegExpOpr();
bool checkConstRegExp( Node r );
- void simplify(Node t, std::vector< Node > &new_nodes);
+ void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
Node mkAllExceptOne( char c );
Node complement( Node r );
int delta( Node r );
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index ba36aa371..28ef43cf9 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -47,6 +47,7 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_str_pos_ctn( c ), d_str_neg_ctn( c ), d_reg_exp_mem( c ), + d_regexp_deriv_processed( c ), d_curr_cardinality( c, 0 ) { // The kinds we are treating as function application in congruence @@ -61,7 +62,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - d_true = NodeManager::currentNM()->mkConst( true ); + d_emptyRegexp = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ); + d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); d_all_warning = true; @@ -1010,10 +1012,12 @@ bool TheoryStrings::processLoop(std::vector< Node > &antec, conc = NodeManager::currentNM()->mkNode( kind::AND, vec_conc );//, len_x_gt_len_y } // normal case - //set its antecedant to ant, to say when it is relevant - d_reg_exp_ant[str_in_re] = ant; - //unroll the str in re constraint once - unrollStar( str_in_re ); + if( !options::stringExp() ) { + //set its antecedant to ant, to say when it is relevant + d_reg_exp_ant[str_in_re] = ant; + //unroll the str in re constraint once + unrollStar( str_in_re ); + } sendLemma( ant, conc, "LOOP-BREAK" ); ++(d_statistics.d_loop_lemmas); @@ -2234,7 +2238,7 @@ bool TheoryStrings::unrollStar( Node atom ) { cc.push_back(unr0); } else { std::vector< Node > urc; - d_regexp_opr.simplify(unr1, urc); + d_regexp_opr.simplify(unr1, urc, true); Node unr0 = sk_s.eqNode( d_emptyString ).negate(); cc.push_back(unr0); //cc.push_back(urc1); cc.insert(cc.end(), urc.begin(), urc.end()); @@ -2275,6 +2279,84 @@ bool TheoryStrings::checkMemberships() { for( unsigned i=0; i<d_reg_exp_mem.size(); i++ ){ //check regular expression membership Node assertion = d_reg_exp_mem[i]; + if(d_regexp_cache.find(assertion) == d_regexp_cache.end()) { + Trace("strings-regexp") << "We have regular expression assertion : " << assertion << std::endl; + Node atom = assertion.getKind()==kind::NOT ? assertion[0] : assertion; + bool polarity = assertion.getKind()!=kind::NOT; + bool flag = true; + if( polarity ) { + Node x = atom[0]; + Node r = atom[1]; + /*if(d_opt_regexp_gcd) { + if(d_membership_length.find(atom) == d_membership_length.end()) { + addedLemma = addMembershipLength(atom); + d_membership_length[atom] = true; + } else { + Trace("strings-regexp") << "Membership length is already added." << std::endl; + } + }*/ + if(d_regexp_deriv_processed.find(atom) != d_regexp_deriv_processed.end()) { + flag = false; + } else { + if(areEqual(x, d_emptyString)) { + int rdel = d_regexp_opr.delta(r); + if(rdel == 1) { + flag = false; + d_regexp_deriv_processed[atom] = true; + } else if(rdel == 2) { + Node antec = x.eqNode(d_emptyString); + antec = NodeManager::currentNM()->mkNode(kind::AND, antec, atom); + Node conc = Node::null(); + sendLemma(antec, conc, "RegExp Delta Conflict"); + addedLemma = true; + flag = false; + d_regexp_deriv_processed[atom] = true; + } + } /*else if(splitRegExp( x, r, atom )) { + addedLemma = true; flag = false; + d_regexp_deriv_processed[ atom ] = true; + }*/ + } + } else { + //TODO + if(! options::stringExp()) { + is_unk = true; + break; + } + } + if(flag) { + std::vector< Node > nvec; + d_regexp_opr.simplify(atom, nvec, polarity); + Node conc = nvec.size()==1 ? nvec[0] : + NodeManager::currentNM()->mkNode(kind::AND, nvec); + conc = Rewriter::rewrite(conc); + sendLemma( assertion, conc, "REGEXP" ); + addedLemma = true; + d_regexp_cache[assertion] = true; + } + } + if(d_conflict) { + break; + } + } + if( addedLemma ){ + doPendingLemmas(); + return true; + }else{ + if( is_unk ){ + Trace("strings-regexp") << "Strings::regexp: possibly incomplete." << std::endl; + d_out->setIncomplete(); + } + return false; + } +} +//TODO remove +bool TheoryStrings::checkMemberships2() { + bool is_unk = false; + bool addedLemma = false; + for( unsigned i=0; i<d_reg_exp_mem.size(); i++ ){ + //check regular expression membership + Node assertion = d_reg_exp_mem[i]; Trace("strings-regexp") << "We have regular expression assertion : " << assertion << std::endl; Node atom = assertion.getKind()==kind::NOT ? assertion[0] : assertion; bool polarity = assertion.getKind()!=kind::NOT; @@ -2437,19 +2519,19 @@ bool TheoryStrings::checkNegContains() { } } else { if(d_str_neg_ctn_rewritten.find(atom) == d_str_neg_ctn_rewritten.end()) { - Node b1 = NodeManager::currentNM()->mkBoundVar("bi", NodeManager::currentNM()->integerType()); + Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); Node g1 = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode( kind::GEQ, b1, d_zero ), NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode( kind::MINUS, lenx, lens ), b1 ) ) ); - Node b2 = NodeManager::currentNM()->mkBoundVar("bj", NodeManager::currentNM()->integerType()); + Node b2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, x, NodeManager::currentNM()->mkNode( kind::PLUS, b1, b2 ), d_one); Node s5 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b2, d_one); - Node s1 = NodeManager::currentNM()->mkBoundVar("s1", NodeManager::currentNM()->stringType()); - Node s3 = NodeManager::currentNM()->mkBoundVar("s3", NodeManager::currentNM()->stringType()); - Node s4 = NodeManager::currentNM()->mkBoundVar("s4", NodeManager::currentNM()->stringType()); - Node s6 = NodeManager::currentNM()->mkBoundVar("s6", NodeManager::currentNM()->stringType()); - Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b2, s1, s3, s4, s6); + //Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s3 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s4 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + //Node s6 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b2);//, s1, s3, s4, s6); std::vector< Node > vec_nodes; Node cc = NodeManager::currentNM()->mkNode( kind::GEQ, b2, d_zero ); @@ -2457,22 +2539,22 @@ bool TheoryStrings::checkNegContains() { cc = NodeManager::currentNM()->mkNode( kind::GEQ, lens, b2 ); vec_nodes.push_back(cc); - cc = x.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2, s3)); - vec_nodes.push_back(cc); - cc = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s4, s5, s6)); - vec_nodes.push_back(cc); + //cc = x.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2, s3)); + //vec_nodes.push_back(cc); + //cc = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s4, s5, s6)); + //vec_nodes.push_back(cc); cc = s2.eqNode(s5).negate(); vec_nodes.push_back(cc); - cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1).eqNode(NodeManager::currentNM()->mkNode( kind::PLUS, b1, b2 )); - vec_nodes.push_back(cc); - cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s4).eqNode(b2); - vec_nodes.push_back(cc); + //cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1).eqNode(NodeManager::currentNM()->mkNode( kind::PLUS, b1, b2 )); + //vec_nodes.push_back(cc); + //cc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s4).eqNode(b2); + //vec_nodes.push_back(cc); // charAt length - cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s2)); - vec_nodes.push_back(cc); - cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s5)); - vec_nodes.push_back(cc); + //cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s2)); + //vec_nodes.push_back(cc); + //cc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s5)); + //vec_nodes.push_back(cc); Node conc = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::AND, vec_nodes) ); Node xlss = NodeManager::currentNM()->mkNode( kind::GT, lens, lenx ); @@ -2553,7 +2635,7 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { for(unsigned i=0; i<s.size(); ++i) { CVC4::String c = s.substr(i, 1); dc = d_regexp_opr.derivativeSingle(dc, c); - if(dc.isNull()) { + if(dc == d_emptyRegexp) { // CONFLICT flag = false; break; @@ -2575,7 +2657,7 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { conc = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, left, dc ); std::vector< Node > sdc; - d_regexp_opr.simplify(conc, sdc); + d_regexp_opr.simplify(conc, sdc, true); if(sdc.size() == 1) { conc = sdc[0]; } else { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index ea92865b2..bf8a3d894 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -111,6 +111,7 @@ public: private: // Constants Node d_emptyString; + Node d_emptyRegexp; Node d_true; Node d_false; Node d_zero; @@ -228,6 +229,7 @@ private: bool checkCardinality(); bool checkInductiveEquations(); bool checkMemberships(); + bool checkMemberships2(); bool checkContains(); bool checkPosContains(); bool checkNegContains(); @@ -299,6 +301,7 @@ private: private: // regular expression memberships NodeList d_reg_exp_mem; + std::map< Node, bool > d_regexp_cache; // antecedant for why reg exp membership must be true std::map< Node, Node > d_reg_exp_ant; std::map< Node, bool > d_reg_exp_unroll; @@ -310,6 +313,7 @@ private: std::map< Node, bool > d_membership_length; // regular expression derivative std::map< Node, bool > d_reg_exp_deriv; + NodeBoolMap d_regexp_deriv_processed; // regular expression operations RegExpOpr d_regexp_opr; diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 43d3bfe47..bb79f337b 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -32,58 +32,76 @@ StringsPreprocess::StringsPreprocess() { d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); } -void StringsPreprocess::simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn ) { +void StringsPreprocess::processRegExp( Node s, Node r, std::vector< Node > &ret ) { int k = r.getKind(); switch( k ) { - case kind::STRING_TO_REGEXP: - { - Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] ); + case kind::REGEXP_EMPTY: { + Node eq = NodeManager::currentNM()->mkConst( false ); ret.push_back( eq ); + break; } + case kind::REGEXP_SIGMA: { + Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); + Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); + ret.push_back( eq ); break; - case kind::REGEXP_CONCAT: - { + } + case kind::STRING_TO_REGEXP: { + Node eq = s.eqNode( r[0] ); + ret.push_back( eq ); + break; + } + case kind::REGEXP_CONCAT: { + bool flag = true; std::vector< Node > cc; for(unsigned i=0; i<r.getNumChildren(); ++i) { if(r[i].getKind() == kind::STRING_TO_REGEXP) { cc.push_back( r[i][0] ); } else { - Node sk = NodeManager::currentNM()->mkSkolem( "recsym_$$", s.getType(), "created for regular expression concat" ); - simplifyRegExp( sk, r[i], ret, nn ); - cc.push_back( sk ); + flag = false; + break; } } - Node cc_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, - NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, cc ) ); - nn.push_back( cc_eq ); - } + if(flag) { + Node eq = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc)); + ret.push_back(eq); + } else { + Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + ret.push_back( eq ); + } break; - case kind::REGEXP_OR: - { + } + case kind::REGEXP_UNION: { std::vector< Node > c_or; for(unsigned i=0; i<r.getNumChildren(); ++i) { - simplifyRegExp( s, r[i], c_or, nn ); + std::vector< Node > ntmp; + processRegExp( s, r[i], ntmp ); + Node lem = ntmp.size()==1 ? ntmp[0] : NodeManager::currentNM()->mkNode(kind::AND, ntmp); + c_or.push_back( lem ); } - Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or ); + Node eq = NodeManager::currentNM()->mkNode(kind::OR, c_or); ret.push_back( eq ); - } break; - case kind::REGEXP_INTER: + } + case kind::REGEXP_INTER: { for(unsigned i=0; i<r.getNumChildren(); ++i) { - simplifyRegExp( s, r[i], ret, nn ); + processRegExp( s, r[i], ret ); } break; - case kind::REGEXP_STAR: - { - Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); - ret.push_back( eq ); } + case kind::REGEXP_STAR: { + if(r[0].getKind() == kind::REGEXP_SIGMA) { + ret.push_back(NodeManager::currentNM()->mkConst(true)); + } else { + Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + ret.push_back( eq ); + } break; - default: - //TODO: special sym: sigma, none, all + } + default: { Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl; - Assert( false ); - break; + AlwaysAssert( false, "Unsupported Term" ); + } } } @@ -150,24 +168,18 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { d_cache[t] = t; retNode = t; } - } else */if( t.getKind() == kind::STRING_IN_REGEXP ) { - // t0 in t1 + } else */ + if( t.getKind() == kind::STRING_IN_REGEXP ) { Node t0 = simplify( t[0], new_nodes ); - //if(!checkStarPlus( t[1] )) { - //rewrite it - std::vector< Node > ret; - std::vector< Node > nn; - simplifyRegExp( t0, t[1], ret, nn ); - new_nodes.insert( new_nodes.end(), nn.begin(), nn.end() ); + //rewrite it + std::vector< Node > ret; + processRegExp( t0, t[1], ret ); - Node n = ret.size() == 1 ? ret[0] : NodeManager::currentNM()->mkNode( kind::AND, ret ); - d_cache[t] = (t == n) ? Node::null() : n; - retNode = n; - //} else { - // TODO - // return (t0 == t[0]) ? t : NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, t0, t[1] ); - //} + Node n = ret.size() == 1 ? ret[0] : NodeManager::currentNM()->mkNode( kind::AND, ret ); + n = Rewriter::rewrite(n); + d_cache[t] = (t == n) ? Node::null() : n; + retNode = n; } else if( t.getKind() == kind::STRING_SUBSTR_TOTAL ) { Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ), @@ -246,7 +258,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { new_nodes.push_back(lem); //non-neg - Node b1 = NodeManager::currentNM()->mkBoundVar("x", NodeManager::currentNM()->integerType()); + Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); Node g1 = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode( kind::GEQ, b1, d_zero ), NodeManager::currentNM()->mkNode( kind::GT, lenp, b1 ) ) ); @@ -286,8 +298,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node cc3 = NodeManager::currentNM()->mkNode(kind::GEQ, ufMx, d_zero); Node cc4 = NodeManager::currentNM()->mkNode(kind::GEQ, nine, ufMx); - Node b21 = NodeManager::currentNM()->mkBoundVar("y", NodeManager::currentNM()->stringType()); - Node b22 = NodeManager::currentNM()->mkBoundVar("z", NodeManager::currentNM()->stringType()); + Node b21 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node b22 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b21, b22); Node c21 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, b21).eqNode( @@ -367,23 +379,14 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node cc1 = t[0].eqNode(NodeManager::currentNM()->mkConst(::CVC4::String(""))); //cc1 = NodeManager::currentNM()->mkNode(kind::AND, ufP0.eqNode(negone), cc1); //cc2 - Node b1 = NodeManager::currentNM()->mkBoundVar("x", NodeManager::currentNM()->integerType()); - Node z1 = NodeManager::currentNM()->mkBoundVar("z1", NodeManager::currentNM()->stringType()); - Node z2 = NodeManager::currentNM()->mkBoundVar("z2", NodeManager::currentNM()->stringType()); - Node z3 = NodeManager::currentNM()->mkBoundVar("z3", NodeManager::currentNM()->stringType()); - Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1, z1, z2, z3); std::vector< Node > vec_n; - Node g = NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero); - vec_n.push_back(g); - g = NodeManager::currentNM()->mkNode(kind::GT, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, t[0]), b1); - vec_n.push_back(g); - g = b1.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, z1) ); - vec_n.push_back(g); - g = one.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, z2) ); + Node z1 = NodeManager::currentNM()->mkSkolem("z1_$$", NodeManager::currentNM()->stringType()); + Node z2 = NodeManager::currentNM()->mkSkolem("z1_$$", NodeManager::currentNM()->stringType()); + Node z3 = NodeManager::currentNM()->mkSkolem("z1_$$", NodeManager::currentNM()->stringType()); + Node g = one.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, z2) ); vec_n.push_back(g); g = t[0].eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, z1, z2, z3) ); vec_n.push_back(g); - //Node z2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, t[0], one); char chtmp[2]; chtmp[1] = '\0'; for(unsigned i=0; i<=9; i++) { @@ -393,10 +396,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { vec_n.push_back(g); } Node cc2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, vec_n)); - cc2 = NodeManager::currentNM()->mkNode(kind::EXISTS, b1v, cc2); - //cc2 = NodeManager::currentNM()->mkNode(kind::AND, ufP0.eqNode(negone), cc2); //cc3 - Node b2 = NodeManager::currentNM()->mkBoundVar("y", NodeManager::currentNM()->integerType()); + Node b2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b2); Node g2 = NodeManager::currentNM()->mkNode(kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b2, d_zero), @@ -404,8 +405,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node ufx = NodeManager::currentNM()->mkNode(kind::APPLY_UF, ufP, b2); Node ufx1 = NodeManager::currentNM()->mkNode(kind::APPLY_UF, ufP, NodeManager::currentNM()->mkNode(kind::MINUS,b2,one)); Node ufMx = NodeManager::currentNM()->mkNode(kind::APPLY_UF, ufM, b2); - Node w1 = NodeManager::currentNM()->mkBoundVar("w1", NodeManager::currentNM()->stringType()); - Node w2 = NodeManager::currentNM()->mkBoundVar("w2", NodeManager::currentNM()->stringType()); + Node w1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); + Node w2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType()); Node b3v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, w1, w2); Node b2gtz = NodeManager::currentNM()->mkNode(kind::GT, b2, d_zero); Node c3c1 = ufx1.eqNode( NodeManager::currentNM()->mkNode(kind::PLUS, @@ -533,17 +534,12 @@ Node StringsPreprocess::decompose(Node t, std::vector< Node > & new_nodes) { unsigned num = t.getNumChildren(); if(num == 0) { return simplify(t, new_nodes); - } else if(num == 1) { - Node s = decompose(t[0], new_nodes); - if(s != t[0]) { - Node tmp = NodeManager::currentNM()->mkNode( t.getKind(), s ); - return simplify(tmp, new_nodes); - } else { - return simplify(t, new_nodes); - } } else { bool changed = false; std::vector< Node > cc; + if (t.getMetaKind() == kind::metakind::PARAMETERIZED) { + cc.push_back(t.getOperator()); + } for(unsigned i=0; i<t.getNumChildren(); i++) { Node s = decompose(t[i], new_nodes); cc.push_back( s ); diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h index b7d298471..69454db84 100644 --- a/src/theory/strings/theory_strings_preprocess.h +++ b/src/theory/strings/theory_strings_preprocess.h @@ -36,7 +36,7 @@ class StringsPreprocess { private: bool checkStarPlus( Node t ); int checkFixLenVar( Node t ); - void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn ); + void processRegExp( Node s, Node r, std::vector< Node > &ret ); Node simplify( Node t, std::vector< Node > &new_nodes ); Node decompose( Node t, std::vector< Node > &new_nodes ); public: diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index c0d66ab22..b118e389e 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -28,28 +28,23 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { Node retNode = node; std::vector<Node> node_vec; Node preNode = Node::null(); - bool emptyflag = false; for(unsigned int i=0; i<node.getNumChildren(); ++i) { Node tmpNode = node[i]; - if( tmpNode.getKind() == kind::REGEXP_EMPTY ) { - emptyflag = true; - break; - } else if(node[i].getKind() == kind::STRING_CONCAT) { + if(node[i].getKind() == kind::STRING_CONCAT) { tmpNode = rewriteConcatString(node[i]); if(tmpNode.getKind() == kind::STRING_CONCAT) { - unsigned int j=0; + unsigned j=0; if(!preNode.isNull()) { if(tmpNode[0].isConst()) { preNode = NodeManager::currentNM()->mkConst( preNode.getConst<String>().concat( tmpNode[0].getConst<String>() ) ); node_vec.push_back( preNode ); preNode = Node::null(); - ++j; } else { node_vec.push_back( preNode ); preNode = Node::null(); node_vec.push_back( tmpNode[0] ); - ++j; } + ++j; } for(; j<tmpNode.getNumChildren() - 1; ++j) { node_vec.push_back( tmpNode[j] ); @@ -58,7 +53,7 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { } } if(!tmpNode.isConst()) { - if(preNode != Node::null()) { + if(!preNode.isNull()) { if(preNode.getKind() == kind::CONST_STRING && preNode.getConst<String>().isEmptyString() ) { preNode = Node::null(); } else { @@ -75,17 +70,13 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { } } } - if(emptyflag) { - retNode = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ); + if(preNode != Node::null()) { + node_vec.push_back( preNode ); + } + if(node_vec.size() > 1) { + retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec); } else { - if(preNode != Node::null()) { - node_vec.push_back( preNode ); - } - if(node_vec.size() > 1) { - retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec); - } else { - retNode = node_vec[0]; - } + retNode = node_vec[0]; } Trace("strings-prerewrite") << "Strings::rewriteConcatString end " << retNode << std::endl; return retNode; @@ -101,23 +92,22 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { for(unsigned int i=0; i<node.getNumChildren(); ++i) { Trace("strings-prerewrite") << "Strings::prerewriteConcatRegExp preNode: " << preNode << std::endl; Node tmpNode = node[i]; - if(node[i].getKind() == kind::REGEXP_CONCAT) { + if(tmpNode.getKind() == kind::REGEXP_CONCAT) { tmpNode = prerewriteConcatRegExp(node[i]); if(tmpNode.getKind() == kind::REGEXP_CONCAT) { - unsigned int j=0; + unsigned j=0; if(!preNode.isNull()) { if(tmpNode[0].getKind() == kind::STRING_TO_REGEXP) { preNode = rewriteConcatString( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, preNode, tmpNode[0][0] ) ); node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) ); preNode = Node::null(); - ++j; } else { node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) ); preNode = Node::null(); node_vec.push_back( tmpNode[0] ); - ++j; } + ++j; } for(; j<tmpNode.getNumChildren() - 1; ++j) { node_vec.push_back( tmpNode[j] ); @@ -164,27 +154,27 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { } Node TheoryStringsRewriter::prerewriteOrRegExp(TNode node) { - Assert( node.getKind() == kind::REGEXP_OR ); + Assert( node.getKind() == kind::REGEXP_UNION ); Trace("strings-prerewrite") << "Strings::prerewriteOrRegExp start " << node << std::endl; Node retNode = node; std::vector<Node> node_vec; - bool flag = true; - for(unsigned int i=0; i<node.getNumChildren(); ++i) { - if(node[i].getKind() == kind::REGEXP_OR) { + bool flag = false; + for(unsigned i=0; i<node.getNumChildren(); ++i) { + if(node[i].getKind() == kind::REGEXP_UNION) { Node tmpNode = prerewriteOrRegExp( node[i] ); for(unsigned int j=0; j<tmpNode.getNumChildren(); ++j) { - node_vec.push_back( tmpNode[i] ); + node_vec.push_back( tmpNode[j] ); } - flag = false; + flag = true; + } else if(node[i].getKind() == kind::REGEXP_EMPTY) { + flag = true; } else { - if(node[i].getKind() != kind::REGEXP_EMPTY) { - node_vec.push_back( node[i] ); - } + node_vec.push_back( node[i] ); } } if(flag) { retNode = node_vec.size() == 0 ? NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ) : - node_vec.size() == 1 ? node_vec[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_OR, node_vec); + node_vec.size() == 1 ? node_vec[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, node_vec); } Trace("strings-prerewrite") << "Strings::prerewriteOrRegExp end " << retNode << std::endl; return retNode; @@ -209,17 +199,15 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i Assert( index_start <= s.size() ); int k = r.getKind(); switch( k ) { - case kind::STRING_TO_REGEXP: - { + case kind::STRING_TO_REGEXP: { CVC4::String s2 = s.substr( index_start, s.size() - index_start ); if(r[0].getKind() == kind::CONST_STRING) { return ( s2 == r[0].getConst<String>() ); } else { - Assert( false, "RegExp contains variable" ); + Assert( false, "RegExp contains variables" ); } } - case kind::REGEXP_CONCAT: - { + case kind::REGEXP_CONCAT: { if( s.size() != index_start ) { std::vector<int> vec_k( r.getNumChildren(), -1 ); int start = 0; @@ -256,24 +244,21 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i return true; } } - case kind::REGEXP_OR: - { + case kind::REGEXP_UNION: { for(unsigned i=0; i<r.getNumChildren(); ++i) { if(testConstStringInRegExp( s, index_start, r[i] )) return true; } return false; } - case kind::REGEXP_INTER: - { + case kind::REGEXP_INTER: { for(unsigned i=0; i<r.getNumChildren(); ++i) { if(!testConstStringInRegExp( s, index_start, r[i] )) return false; } return true; } - case kind::REGEXP_STAR: - { + case kind::REGEXP_STAR: { if( s.size() != index_start ) { - for(unsigned int k=s.size() - index_start; k>0; --k) { + for(unsigned k=s.size() - index_start; k>0; --k) { CVC4::String t = s.substr(index_start, k); if( testConstStringInRegExp( t, 0, r[0] ) ) { if( index_start + k == s.size() || testConstStringInRegExp( s, index_start + k, r ) ) { @@ -286,25 +271,24 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i return true; } } - case kind::REGEXP_EMPTY: - { + case kind::REGEXP_EMPTY: { return false; } - case kind::REGEXP_SIGMA: - { + case kind::REGEXP_SIGMA: { if(s.size() == 1) { return true; } else { return false; } } - default: - //TODO: special sym: sigma, none, all + default: { Trace("strings-postrewrite") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl; - Assert( false ); + AlwaysAssert( false, "Unsupported Term" ); return false; + } } } + Node TheoryStringsRewriter::rewriteMembership(TNode node) { Node retNode = node; Node x = node[0]; @@ -319,10 +303,13 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { //test whether x in node[1] CVC4::String s = x.getConst<String>(); retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, node[1] ) ); - } else { - if( x != node[0] ) { - retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] ); - } + } else if(node[1].getKind() == kind::REGEXP_SIGMA) { + Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); + retNode = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); + } else if(node[1].getKind() == kind::REGEXP_STAR && node[1][0].getKind() == kind::REGEXP_SIGMA) { + retNode = NodeManager::currentNM()->mkConst( true ); + } else if( x != node[0] ) { + retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] ); } return retNode; } @@ -383,11 +370,21 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { Node zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); if(node[2] == zero) { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - } else if( node[0].isConst() && node[1].isConst() && node[2].isConst() ) { - int i = node[1].getConst<Rational>().getNumerator().toUnsignedInt(); - int j = node[2].getConst<Rational>().getNumerator().toUnsignedInt(); - if( i>=0 && j>=0 && node[0].getConst<String>().size() >= (unsigned) (i + j) ) { - retNode = NodeManager::currentNM()->mkConst( node[0].getConst<String>().substr(i, j) ); + } else if( node[1].isConst() && node[2].isConst() ) { + if(node[1].getConst<Rational>().sgn()>=0 && node[2].getConst<Rational>().sgn()>=0) { + int i = node[1].getConst<Rational>().getNumerator().toUnsignedInt(); + int j = node[2].getConst<Rational>().getNumerator().toUnsignedInt(); + if( node[0].isConst() ) { + if( node[0].getConst<String>().size() >= (unsigned) (i + j) ) { + retNode = NodeManager::currentNM()->mkConst( node[0].getConst<String>().substr(i, j) ); + } else { + retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + } + } else if(node[0].getKind() == kind::STRING_CONCAT && node[0][0].isConst()) { + if( node[0][0].getConst<String>().size() >= (unsigned) (i + j) ) { + retNode = NodeManager::currentNM()->mkConst( node[0][0].getConst<String>().substr(i, j) ); + } + } } else { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); } @@ -492,7 +489,12 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { CVC4::String s = node[0].getConst<String>(); if(s.isNumber()) { std::string stmp = s.toString(); - retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(stmp.c_str())); + if(stmp[0] == '0' && stmp.size() != 1) { + //TODO: leading zeros + retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); + } else { + retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(stmp.c_str())); + } } else { retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); } @@ -524,13 +526,13 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { retNode = rewriteConcatString(node); } else if(node.getKind() == kind::REGEXP_CONCAT) { retNode = prerewriteConcatRegExp(node); - } else if(node.getKind() == kind::REGEXP_OR) { + } else if(node.getKind() == kind::REGEXP_UNION) { retNode = prerewriteOrRegExp(node); } else if(node.getKind() == kind::REGEXP_PLUS) { retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, node[0], NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, node[0])); } else if(node.getKind() == kind::REGEXP_OPT) { - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ), node[0]); } else if(node.getKind() == kind::REGEXP_RANGE) { @@ -544,7 +546,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { if(vec_nodes.size() == 1) { retNode = vec_nodes[0]; } else { - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); } } diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 4ac4c26b4..7eb4ac3d0 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -261,7 +261,7 @@ public: } }; -class RegExpOrTypeRule { +class RegExpUnionTypeRule { public: inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) throw (TypeCheckingExceptionPrivate, AssertionException) { diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index f11ada1a1..705a7eadb 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -37,7 +37,6 @@ TESTS = \ model001.smt2 \ substr001.smt2 \ regexp001.smt2 \ - regexp002.smt2 \ loop001.smt2 \ loop002.smt2 \ loop003.smt2 \ @@ -48,6 +47,8 @@ TESTS = \ loop008.smt2 \ loop009.smt2 +#regexp002.smt2 + FAILING_TESTS = EXTRA_DIST = $(TESTS) diff --git a/test/regress/regress0/strings/fmf001.smt2 b/test/regress/regress0/strings/fmf001.smt2 index f5ed1c3fb..05bbab586 100644 --- a/test/regress/regress0/strings/fmf001.smt2 +++ b/test/regress/regress0/strings/fmf001.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S)
+(set-option :strings-exp true)
(set-option :strings-fmf true)
(set-info :status sat)
diff --git a/test/regress/regress0/strings/fmf002.smt2 b/test/regress/regress0/strings/fmf002.smt2 index 525fc152c..1d41b1085 100644 --- a/test/regress/regress0/strings/fmf002.smt2 +++ b/test/regress/regress0/strings/fmf002.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S)
+(set-option :strings-exp true)
(set-option :strings-fmf true)
(set-info :status sat)
diff --git a/test/regress/regress0/strings/loop007.smt2 b/test/regress/regress0/strings/loop007.smt2 index b292de512..a97d97d54 100644 --- a/test/regress/regress0/strings/loop007.smt2 +++ b/test/regress/regress0/strings/loop007.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S) +(set-option :strings-exp true) (set-info :status sat) (declare-fun x () String) diff --git a/test/regress/regress0/strings/loop008.smt2 b/test/regress/regress0/strings/loop008.smt2 index 91ed8cdf0..113577e48 100644 --- a/test/regress/regress0/strings/loop008.smt2 +++ b/test/regress/regress0/strings/loop008.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S)
+(set-option :strings-exp true)
(set-info :status sat)
(declare-fun x () String)
diff --git a/test/regress/regress0/strings/loop009.smt2 b/test/regress/regress0/strings/loop009.smt2 index 41eab0f35..9ccc6de6e 100644 --- a/test/regress/regress0/strings/loop009.smt2 +++ b/test/regress/regress0/strings/loop009.smt2 @@ -1,4 +1,5 @@ (set-logic QF_S)
+(set-option :strings-exp true)
(set-info :status sat)
(declare-fun x () String)
diff --git a/test/regress/regress0/strings/regexp001.smt2 b/test/regress/regress0/strings/regexp001.smt2 index 41aefbd41..6a2044ea8 100644 --- a/test/regress/regress0/strings/regexp001.smt2 +++ b/test/regress/regress0/strings/regexp001.smt2 @@ -1,5 +1,6 @@ (set-logic QF_S)
(set-info :status sat)
+(set-option :strings-exp true)
(declare-fun x () String)
diff --git a/test/regress/regress0/strings/regexp002.smt2 b/test/regress/regress0/strings/regexp002.smt2 index e2a44a9ab..18541af4e 100644 --- a/test/regress/regress0/strings/regexp002.smt2 +++ b/test/regress/regress0/strings/regexp002.smt2 @@ -1,5 +1,6 @@ (set-logic QF_S)
(set-info :status sat)
+(set-option :strings-exp true)
(declare-fun x () String)
(declare-fun y () String)
|