summaryrefslogtreecommitdiff
path: root/src/theory
diff options
context:
space:
mode:
authorTianyi Liang <tianyi-liang@uiowa.edu>2014-07-25 13:43:44 -0500
committerTianyi Liang <tianyi-liang@uiowa.edu>2014-07-25 13:43:44 -0500
commitc143281522a76a0fae3bb4167feca75e5aef326a (patch)
tree59fd6126a3d5f4ba9aff6f14b16b8417bdc0fb70 /src/theory
parent4e1e7cae681ac746a79505420dc0cc1febee226c (diff)
patch for regular expression intersection caching
Diffstat (limited to 'src/theory')
-rw-r--r--src/theory/strings/kinds5
-rw-r--r--src/theory/strings/regexp_operation.cpp217
-rw-r--r--src/theory/strings/regexp_operation.h5
-rw-r--r--src/theory/strings/theory_strings.cpp5
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp8
-rw-r--r--src/theory/strings/theory_strings_type_rules.h15
6 files changed, 248 insertions, 7 deletions
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds
index 4266c02f5..0f68d1207 100644
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -88,6 +88,11 @@ operator REGEXP_LOOP 2:3 "regexp loop"
operator REGEXP_EMPTY 0 "regexp empty"
operator REGEXP_SIGMA 0 "regexp all characters"
+#internal
+operator REGEXP_RV 1 "regexp rv (internal use only)"
+typerule REGEXP_RV ::CVC4::theory::strings::RegExpRVTypeRule
+
+#typerules
typerule REGEXP_CONCAT ::CVC4::theory::strings::RegExpConcatTypeRule
typerule REGEXP_UNION ::CVC4::theory::strings::RegExpUnionTypeRule
typerule REGEXP_INTER ::CVC4::theory::strings::RegExpInterTypeRule
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index 369278994..20fbf2870 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -1183,8 +1183,19 @@ void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset )
}
}
+bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
+ for(std::set< PairNodes >::const_iterator itr = s.begin();
+ itr != s.end(); ++itr) {
+ if(itr->first == n1 && itr->second == n2 ||
+ itr->first == n2 && itr->second == n1) {
+ return true;
+ }
+ }
+ return false;
+}
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
+ Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
if(spflag) {
//TODO: var
return Node::null();
@@ -1230,11 +1241,18 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::se
spflag = true;
}
}
+ if(Trace.isOn("regexp-debug")) {
+ Trace("regexp-debug") << "Try CSET( " << cset.size() << " ) = ";
+ for(std::set<unsigned>::const_iterator itr = cset.begin();
+ itr != cset.end(); itr++) {
+ Trace("regexp-debug") << *itr << ", ";
+ }
+ Trace("regexp-debug") << std::endl;
+ }
for(std::set<unsigned>::const_iterator itr = cset.begin();
itr != cset.end(); itr++) {
CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
- std::pair< Node, Node > p(r1, r2);
- if(cache[ *itr ].find(p) == cache[ *itr ].end()) {
+ if(!isPairNodesInSet(cache[ *itr ], r1, r2)) {
Node r1l = derivativeSingle(r1, c);
Node r2l = derivativeSingle(r2, c);
std::map< unsigned, std::set< PairNodes > > cache2(cache);
@@ -1263,10 +1281,201 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::se
Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
return rNode;
}
+
+bool RegExpOpr::containC2(unsigned cnt, Node n) {
+ if(n.getKind() == kind::REGEXP_RV) {
+ unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
+ return cnt == y;
+ } else if(n.getKind() == kind::REGEXP_CONCAT) {
+ for( unsigned i=0; i<n.getNumChildren(); i++ ) {
+ if(containC2(cnt, n[i])) {
+ return true;
+ }
+ }
+ } else if(n.getKind() == kind::REGEXP_STAR) {
+ return containC2(cnt, n[0]);
+ } else if(n.getKind() == kind::REGEXP_UNION) {
+ for( unsigned i=0; i<n.getNumChildren(); i++ ) {
+ if(containC2(cnt, n[i])) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+Node RegExpOpr::convert1(unsigned cnt, Node n) {
+ Trace("regexp-debug") << "Converting " << n << " ... " << std::endl;
+ Node r1, r2;
+ convert2(cnt, n, r1, r2);
+ Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
+ Node ret = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
+ NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
+ ret = Rewriter::rewrite( ret );
+ Trace("regexp-debug") << "... done convert, with return " << ret << std::endl;
+ return ret;
+}
+void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
+ if(n == d_emptyRegexp) {
+ r1 = d_emptyRegexp;
+ r2 = d_emptyRegexp;
+ } else if(n == d_emptySingleton) {
+ r1 = d_emptySingleton;
+ r2 = d_emptySingleton;
+ } else if(n.getKind() == kind::REGEXP_RV) {
+ unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
+ r1 = d_emptySingleton;
+ if(cnt == y) {
+ r2 = d_emptyRegexp;
+ } else {
+ r2 = n;
+ }
+ } else if(n.getKind() == kind::REGEXP_CONCAT) {
+ //TODO
+ //convert2 x (r@(Seq l r1))
+ // | contains x r1 = let (r2,r3) = convert2 x r1
+ // in (Seq l r2, r3)
+ // | otherwise = (Empty, r)
+ bool flag = true;
+ std::vector<Node> vr1, vr2;
+ for( unsigned i=0; i<n.getNumChildren(); i++ ) {
+ if(containC2(cnt, n[i])) {
+ Node t1, t2;
+ convert2(cnt, n[i], t1, t2);
+ vr1.push_back(t1);
+ r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
+ NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
+ vr2.push_back(t2);
+ for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
+ vr2.push_back(n[j]);
+ }
+ r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
+ NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
+ flag = false;
+ break;
+ } else {
+ vr1.push_back(n[i]);
+ }
+ }
+ if(flag) {
+ r1 = d_emptySingleton;
+ r2 = n;
+ }
+ } else if(n.getKind() == kind::REGEXP_UNION) {
+ std::vector<Node> vr1, vr2;
+ for( unsigned i=0; i<n.getNumChildren(); i++ ) {
+ Node t1, t2;
+ convert2(cnt, n[i], t1, t2);
+ vr1.push_back(t1);
+ vr2.push_back(t2);
+ }
+ r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
+ r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
+ } else if(n.getKind() == kind::STRING_TO_REGEXP) {
+ r1 = d_emptySingleton;
+ r2 = n;
+ } else {
+ //is it possible?
+ }
+}
+Node RegExpOpr::intersectInternal2( Node r1, Node r2, std::map< PairNodes, Node > cache, bool &spflag, unsigned cnt ) {
+ Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
+ if(spflag) {
+ //TODO: var
+ return Node::null();
+ }
+ std::pair < Node, Node > p(r1, r2);
+ std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
+ Node rNode;
+ if(itr != d_inter_cache.end()) {
+ rNode = itr->second;
+ } else {
+ if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
+ rNode = d_emptyRegexp;
+ } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
+ Node exp;
+ int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
+ if(r == 0) {
+ //TODO: variable
+ spflag = true;
+ } else if(r == 1) {
+ rNode = d_emptySingleton;
+ } else {
+ rNode = d_emptyRegexp;
+ }
+ } else if(r1 == r2) {
+ rNode = convert1(cnt, r1);
+ } else {
+ PairNodes p(r1, r2);
+ std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
+ if(itrcache != cache.end()) {
+ rNode = convert1(cnt, itrcache->second);
+ } else {
+ if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
+ std::vector< unsigned > cset;
+ std::set< unsigned > cset1, cset2;
+ std::set< Node > vset1, vset2;
+ firstChars(r1, cset1, vset1);
+ firstChars(r2, cset2, vset2);
+ std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset1.end(),
+ std::inserter(cset, cset.begin()));
+ std::vector< Node > vec_nodes;
+ Node delta_exp;
+ int flag = delta(r1, delta_exp);
+ int flag2 = delta(r2, delta_exp);
+ if(flag != 2 && flag2 != 2) {
+ if(flag == 1 && flag2 == 1) {
+ vec_nodes.push_back(d_emptySingleton);
+ } else {
+ //TODO
+ spflag = true;
+ }
+ }
+ if(Trace.isOn("regexp-debug")) {
+ Trace("regexp-debug") << "Try CSET( " << cset.size() << " ) = ";
+ for(std::vector<unsigned>::const_iterator itr = cset.begin();
+ itr != cset.end(); itr++) {
+ CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
+ Trace("regexp-debug") << c << ", ";
+ }
+ Trace("regexp-debug") << std::endl;
+ }
+ for(std::vector<unsigned>::const_iterator itr = cset.begin();
+ itr != cset.end(); itr++) {
+ CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
+ Node r1l = derivativeSingle(r1, c);
+ Node r2l = derivativeSingle(r2, c);
+ std::map< PairNodes, Node > cache2(cache);
+ PairNodes p(r1l, r2l);
+ cache2[ p ] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt)));
+ Node rt = intersectInternal2(r1l, r2l, cache2, spflag, cnt+1);
+ rt = convert1(cnt, rt);
+ if(spflag) {
+ //TODO:
+ return Node::null();
+ }
+ rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
+ NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
+ vec_nodes.push_back(rt);
+ }
+ rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
+ NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
+ rNode = Rewriter::rewrite( rNode );
+ } else {
+ //TODO: non-empty var set
+ spflag = true;
+ }
+ }
+ }
+ d_inter_cache[p] = rNode;
+ }
+ Trace("regexp-intersect") << "End of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
+ return rNode;
+}
Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
- std::map< unsigned, std::set< PairNodes > > cache;
+ //std::map< unsigned, std::set< PairNodes > > cache;
+ std::map< PairNodes, Node > cache;
if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
- return intersectInternal(r1, r2, cache, spflag);
+ return intersectInternal2(r1, r2, cache, spflag, 1);
} else {
spflag = true;
return Node::null();
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
index e4ae1208d..2ae578cd6 100644
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -69,9 +69,14 @@ private:
std::string niceChar( Node r );
int gcd ( int a, int b );
Node mkAllExceptOne( char c );
+ bool isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2);
void getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset );
Node intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag );
+ bool containC2(unsigned cnt, Node n);
+ Node convert1(unsigned cnt, Node n);
+ void convert2(unsigned cnt, Node n, Node &r1, Node &r2);
+ Node intersectInternal2( Node r1, Node r2, std::map< PairNodes, Node > cache, bool &spflag, unsigned cnt );
void firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset );
//TODO: for intersection
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 2856ce1e0..30e529663 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -2500,6 +2500,7 @@ bool TheoryStrings::checkMemberships() {
std::vector< Node > processed;
std::vector< Node > cprocessed;
+ Trace("regexp-debug") << "Checking Memberships ... " << std::endl;
//if(options::stringEIT()) {
//TODO: Opt for normal forms
for(NodeListMap::const_iterator itr_xr = d_str_re_map.begin();
@@ -2507,6 +2508,7 @@ bool TheoryStrings::checkMemberships() {
bool spflag = false;
Node x = (*itr_xr).first;
NodeList* lst = (*itr_xr).second;
+ Trace("regexp-debug") << "Checking Memberships for " << x << std::endl;
if(d_inter_index.find(x) == d_inter_index.end()) {
d_inter_index[x] = 0;
}
@@ -2515,6 +2517,7 @@ bool TheoryStrings::checkMemberships() {
if(lst->size() == 1) {
d_inter_cache[x] = (*lst)[0];
d_inter_index[x] = 1;
+ Trace("regexp-debug") << "... only one choice " << std::endl;
} else if(lst->size() > 1) {
Node r;
if(d_inter_cache.find(x) != d_inter_cache.end()) {
@@ -2528,6 +2531,7 @@ bool TheoryStrings::checkMemberships() {
for(int i=0; i<cur_inter_idx; i++) {
++itr_lst;
}
+ Trace("regexp-debug") << "... staring from : " << cur_inter_idx << ", we have " << lst->size() << std::endl;
for(;itr_lst != lst->end(); ++itr_lst) {
Node r2 = *itr_lst;
r = d_regexp_opr.intersect(r, r2, spflag);
@@ -2561,6 +2565,7 @@ bool TheoryStrings::checkMemberships() {
}
//}
+ Trace("regexp-debug") << "... No Intersec Conflict in Memberships " << std::endl;
if(!addedLemma) {
for( unsigned i=0; i<d_regexp_memberships.size(); i++ ) {
//check regular expression membership
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 12ff92b5e..1014a95db 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -302,7 +302,7 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
if(node[1].getKind() == kind::REGEXP_EMPTY) {
retNode = NodeManager::currentNM()->mkConst( false );
- } else if( x.getKind() == kind::CONST_STRING && checkConstRegExp(node[1]) ) {
+ } else if(x.getKind()==kind::CONST_STRING && checkConstRegExp(node[1])) {
//test whether x in node[1]
CVC4::String s = x.getConst<String>();
retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, node[1] ) );
@@ -311,10 +311,12 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
retNode = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x));
} else if(node[1].getKind() == kind::REGEXP_STAR && node[1][0].getKind() == kind::REGEXP_SIGMA) {
retNode = NodeManager::currentNM()->mkConst( true );
- } else if( x != node[0] ) {
+ } else if(node[1].getKind() == kind::STRING_TO_REGEXP) {
+ retNode = x.eqNode(node[1][0]);
+ } else if(x != node[0]) {
retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] );
}
- return retNode;
+ return retNode;
}
RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h
index 6d1bb1c98..8a51ea36c 100644
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -466,6 +466,21 @@ public:
}
};
+class RegExpRVTypeRule {
+public:
+ inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check)
+ throw (TypeCheckingExceptionPrivate, AssertionException) {
+ if( check ) {
+ TypeNode t = n[0].getType(check);
+ if (!t.isInteger()) {
+ throw TypeCheckingExceptionPrivate(n, "expecting an integer term in RV");
+ }
+ }
+ return nodeManager->regexpType();
+ }
+};
+
+
}/* CVC4::theory::strings namespace */
}/* CVC4::theory namespace */
}/* CVC4 namespace */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback