diff options
author | Tianyi Liang <tianyi-liang@uiowa.edu> | 2014-12-06 13:24:01 -0600 |
---|---|---|
committer | Tianyi Liang <tianyi-liang@uiowa.edu> | 2014-12-06 13:26:24 -0600 |
commit | c4410c3123f7dc73bb0296ebe01c172e96b210cc (patch) | |
tree | e585487fee95d2098c9229b84db236b008fcba4c /src/theory/strings | |
parent | 5e2eef449c11b0be6b25942bccf7b0712ebe2d20 (diff) |
Added C++/Java api examples;
Converted cset to be vector of char, instead of vector of int, since we only accept ascii in input.
Diffstat (limited to 'src/theory/strings')
-rw-r--r-- | src/theory/strings/regexp_operation.cpp | 102 | ||||
-rw-r--r-- | src/theory/strings/regexp_operation.h | 20 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.cpp | 7 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_rewriter.cpp | 10 |
4 files changed, 90 insertions, 49 deletions
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index 2347af3e6..9d83b91a8 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -22,7 +22,7 @@ namespace theory { namespace strings { RegExpOpr::RegExpOpr() - : d_card( 256 ), + : d_lastchar( '\xff' ), RMAXINT( LONG_MAX ) { d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); @@ -240,6 +240,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { retNode = d_emptySingleton; break; } + case kind::REGEXP_RANGE: { + CVC4::String a = r[0].getConst<String>(); + CVC4::String b = r[1].getConst<String>(); + retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; + break; + } case kind::STRING_TO_REGEXP: { Node tmp = Rewriter::rewrite(r[0]); if(tmp.isConst()) { @@ -450,6 +456,12 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); break; } + case kind::REGEXP_RANGE: { + CVC4::String a = r[0].getConst<String>(); + CVC4::String b = r[1].getConst<String>(); + retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; + break; + } case kind::STRING_TO_REGEXP: { if(r[0].isConst()) { if(r[0] == d_emptyString) { @@ -556,7 +568,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { default: { //TODO: special sym: sigma, none, all Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; - Assert( false, "Unsupported Term" ); + Unreachable(); //return Node::null(); } } @@ -639,13 +651,13 @@ bool RegExpOpr::guessLength( Node r, int &co ) { } } -void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) { - std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r); +void RegExpOpr::firstChars( Node r, std::set<char> &pcset, SetNodes &pvset ) { + std::map< Node, std::pair< std::set<char>, SetNodes > >::const_iterator itr = d_fset_cache.find(r); if(itr != d_fset_cache.end()) { pcset.insert((itr->second).first.begin(), (itr->second).first.end()); pvset.insert((itr->second).second.begin(), (itr->second).second.end()); } else { - std::set<unsigned> cset; + std::set<char> cset; SetNodes vset; int k = r.getKind(); switch( k ) { @@ -653,7 +665,7 @@ void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) break; } case kind::REGEXP_SIGMA: { - for(unsigned i=0; i<d_card; i++) { + for(char i='\0'; i<=d_lastchar; i++) { cset.insert(i); } break; @@ -663,14 +675,14 @@ void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) if(st.isConst()) { CVC4::String s = st.getConst< CVC4::String >(); if(s.size() != 0) { - cset.insert(s[0]); + cset.insert(s.getFirstChar()); } } else if(st.getKind() == kind::VARIABLE) { vset.insert( st ); } else { if(st[0].isConst()) { CVC4::String s = st[0].getConst< CVC4::String >(); - cset.insert(s[0]); + cset.insert(s.getFirstChar()); } else { vset.insert( st[0] ); } @@ -707,23 +719,23 @@ void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) break; } default: { - Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl; - Assert( false, "Unsupported Term" ); + //Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl; + Unreachable(); } } pcset.insert(cset.begin(), cset.end()); pvset.insert(vset.begin(), vset.end()); - std::pair< std::set<unsigned>, SetNodes > p(cset, vset); + std::pair< std::set<char>, SetNodes > p(cset, vset); d_fset_cache[r] = p; if(Trace.isOn("regexp-fset")) { Trace("regexp-fset") << "FSET(" << mkString(r) << ") = {"; - for(std::set<unsigned>::const_iterator itr = cset.begin(); + for(std::set<char>::const_iterator itr = cset.begin(); itr != cset.end(); itr++) { if(itr != cset.begin()) { Trace("regexp-fset") << ","; } - Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr); + Trace("regexp-fset") << (*itr); } Trace("regexp-fset") << "}" << std::endl; } @@ -1109,13 +1121,13 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes } } -void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) { - std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r); +void RegExpOpr::getCharSet( Node r, std::set<char> &pcset, SetNodes &pvset ) { + std::map< Node, std::pair< std::set<char>, SetNodes > >::const_iterator itr = d_cset_cache.find(r); if(itr != d_cset_cache.end()) { pcset.insert((itr->second).first.begin(), (itr->second).first.end()); pvset.insert((itr->second).second.begin(), (itr->second).second.end()); } else { - std::set<unsigned> cset; + std::set<char> cset; SetNodes vset; int k = r.getKind(); switch( k ) { @@ -1123,7 +1135,15 @@ void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) break; } case kind::REGEXP_SIGMA: { - for(unsigned i=0; i<d_card; i++) { + for(char i='\0'; i<=d_lastchar; i++) { + cset.insert(i); + } + break; + } + case kind::REGEXP_RANGE: { + char a = r[0].getConst<String>().getFirstChar(); + char b = r[1].getConst<String>().getFirstChar(); + for(char i=a; i<=b; i++) { cset.insert(i); } break; @@ -1172,19 +1192,19 @@ void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) break; } default: { - Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl; - Assert( false, "Unsupported Term" ); + //Trace("strings-error") << "Unsupported term: " << r << " in getCharSet." << std::endl; + Unreachable(); } } pcset.insert(cset.begin(), cset.end()); pvset.insert(vset.begin(), vset.end()); - std::pair< std::set<unsigned>, SetNodes > p(cset, vset); + std::pair< std::set<char>, SetNodes > p(cset, vset); d_cset_cache[r] = p; Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { "; - for(std::set<unsigned>::const_iterator itr = cset.begin(); + for(std::set<char>::const_iterator itr = cset.begin(); itr != cset.end(); itr++) { - Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ","; + Trace("regexp-cset") << (*itr) << ","; } Trace("regexp-cset") << " }" << std::endl; } @@ -1251,11 +1271,6 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { r2 = n; } } else if(n.getKind() == kind::REGEXP_CONCAT) { - //TODO - //convert2 x (r@(Seq l r1)) - // | contains x r1 = let (r2,r3) = convert2 x r1 - // in (Seq l r2, r3) - // | otherwise = (Empty, r) bool flag = true; std::vector<Node> vr1, vr2; for( unsigned i=0; i<n.getNumChildren(); i++ ) { @@ -1291,11 +1306,12 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { } r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1); r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2); - } else if(n.getKind() == kind::STRING_TO_REGEXP) { + } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) { r1 = d_emptySingleton; r2 = n; } else { //is it possible? + Unreachable(); } } @@ -1318,7 +1334,7 @@ bool RegExpOpr::testNoRV(Node r) { } Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) { - //(checkConstRegExp(r1) && checkConstRegExp(r2)) + //Assert(checkConstRegExp(r1) && checkConstRegExp(r2)); if(r1 > r2) { TNode tmpNode = r1; r1 = r2; @@ -1358,8 +1374,8 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > if(itrcache != cache.end()) { rNode = itrcache->second; } else { - std::vector< unsigned > cset; - std::set< unsigned > cset1, cset2; + std::vector< char > cset; + std::set< char > cset1, cset2; std::set< Node > vset1, vset2; firstChars(r1, cset1, vset1); firstChars(r2, cset2, vset2); @@ -1379,20 +1395,20 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > } if(Trace.isOn("regexp-int-debug")) { Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {"; - for(std::vector<unsigned>::const_iterator itr = cset.begin(); + for(std::vector<char>::const_iterator itr = cset.begin(); itr != cset.end(); itr++) { - CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) ); + //CVC4::String c( *itr ); if(itr != cset.begin()) { Trace("regexp-int-debug") << ", "; } - Trace("regexp-int-debug") << c; + Trace("regexp-int-debug") << ( *itr ); } Trace("regexp-int-debug") << std::endl; } std::map< PairNodes, Node > cacheX; - for(std::vector<unsigned>::const_iterator itr = cset.begin(); + for(std::vector<char>::const_iterator itr = cset.begin(); itr != cset.end(); itr++) { - CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) ); + CVC4::String c( *itr ); Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl; Node r1l = derivativeSingle(r1, c); Node r2l = derivativeSingle(r2, c); @@ -1502,8 +1518,10 @@ Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) { Node rr1 = removeIntersection(r1); Node rr2 = removeIntersection(r2); std::map< PairNodes, Node > cache; - //std::map< PairNodes, Node > inter_cache; - return intersectInternal(rr1, rr2, cache, 1); + Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ")" << std::endl; + Node retNode = intersectInternal(rr1, rr2, cache, 1); + Trace("regexp-intersect") << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ") =\n\t" << mkString(retNode) << std::endl; + return retNode; } else { spflag = true; return Node::null(); @@ -1525,12 +1543,12 @@ Node RegExpOpr::complement(Node r, int &ret) { //TODO: var to be extended ret = 0; } else { - std::set<unsigned> cset; + std::set<char> cset; SetNodes vset; firstChars(r, cset, vset); std::vector< Node > vec_nodes; - for(unsigned i=0; i<d_card; i++) { - CVC4::String c = CVC4::String::convertUnsignedIntToChar(i); + for(char i=0; i<=d_lastchar; i++) { + CVC4::String c(i); Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)); Node r2; if(cset.find(i) == cset.end()) { @@ -1781,7 +1799,7 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector<Node> &vec_or) { std::string RegExpOpr::niceChar(Node r) { if(r.isConst()) { std::string s = r.getConst<CVC4::String>().toString() ; - return s == "" ? "{E}" : ( s == " " ? "{ }" : s.size()>1? "("+s+")" : s ); + return s == "" ? "{E}" : ( s == " " ? "{ }" : s ); } else { std::string ss = "$" + r.toString(); return ss; diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index a522161fb..d170bd17a 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -39,7 +39,7 @@ class RegExpOpr { typedef std::pair< Node, Node > PairNodes; private: - unsigned d_card; + const char d_lastchar; Node d_emptyString; Node d_true; Node d_false; @@ -61,8 +61,8 @@ private: std::map< PairNodeStr, std::pair< Node, int > > d_deriv_cache; std::map< Node, std::pair< Node, int > > d_compl_cache; std::map< Node, bool > d_cstre_cache; - std::map< Node, std::pair< std::set<unsigned>, std::set<Node> > > d_cset_cache; - std::map< Node, std::pair< std::set<unsigned>, std::set<Node> > > d_fset_cache; + std::map< Node, std::pair< std::set<char>, std::set<Node> > > d_cset_cache; + std::map< Node, std::pair< std::set<char>, std::set<Node> > > d_fset_cache; std::map< PairNodes, Node > d_inter_cache; std::map< Node, Node > d_rm_inter_cache; std::map< Node, bool > d_norv_cache; @@ -75,18 +75,28 @@ private: Node mkAllExceptOne( char c ); bool isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2); - void getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ); + void getCharSet( Node r, std::set<char> &pcset, SetNodes &pvset ); bool containC2(unsigned cnt, Node n); Node convert1(unsigned cnt, Node n); void convert2(unsigned cnt, Node n, Node &r1, Node &r2); bool testNoRV(Node r); Node intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ); Node removeIntersection(Node r); - void firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ); + void firstChars( Node r, std::set<char> &pcset, SetNodes &pvset ); //TODO: for intersection bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars ); + /*class CState { + public: + Node r1; + Node r2; + unsigned cnt; + Node head; + CState(Node rr1, Node rr2, Node rcnt, Node rhead) + : r1(rr1), r2(rr2), cnt(rcnt), head(rhead) {} + };*/ + public: RegExpOpr(); diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 384d4567b..f394047fa 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -472,12 +472,15 @@ void TheoryStrings::preRegisterTerm(TNode n) { void TheoryStrings::preRegisterTerm(TNode n) { if( d_registed_terms_cache.find(n) == d_registed_terms_cache.end() ) { switch( n.getKind() ) { - case kind::EQUAL: + case kind::EQUAL: { d_equalityEngine.addTriggerEquality(n); break; - case kind::STRING_IN_REGEXP: + } + case kind::STRING_IN_REGEXP: { + d_out->requirePhase(n, true); d_equalityEngine.addTriggerPredicate(n); break; + } //case kind::STRING_SUBSTR_TOTAL: default: { if( n.getType().isString() ) { diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 6fcbdfff3..56f6a2ef0 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -358,6 +358,16 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i return false; } } + case kind::REGEXP_RANGE: { + if(s.size() == index_start + 1) { + char a = r[0].getConst<String>().getFirstChar(); + char b = r[1].getConst<String>().getFirstChar(); + char c = s.getLastChar(); + return (a <= c && c <= b); + } else { + return false; + } + } default: { Trace("strings-error") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl; Assert( false, "Unsupported Term" ); |