diff options
author | Tianyi Liang <tianyi-liang@uiowa.edu> | 2013-11-06 10:43:06 -0600 |
---|---|---|
committer | Tianyi Liang <tianyi-liang@uiowa.edu> | 2013-11-06 10:59:42 -0600 |
commit | e79fe51d91c329a92b9141a65305ebce5c108c21 (patch) | |
tree | eebd164e2177adaeeafc75d81de6139a3ff27d44 | |
parent | ad0f78965f23b0994cac6a210650697b9a20cceb (diff) |
add seperate regular expression files
-rw-r--r-- | src/theory/strings/Makefile.am | 3 | ||||
-rw-r--r-- | src/theory/strings/regexp_operation.cpp | 656 | ||||
-rw-r--r-- | src/theory/strings/regexp_operation.h | 65 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.cpp | 131 | ||||
-rw-r--r-- | src/theory/strings/theory_strings.h | 7 |
5 files changed, 847 insertions, 15 deletions
diff --git a/src/theory/strings/Makefile.am b/src/theory/strings/Makefile.am index 38efa33f3..f7a6fa0a2 100644 --- a/src/theory/strings/Makefile.am +++ b/src/theory/strings/Makefile.am @@ -13,7 +13,8 @@ libstrings_la_SOURCES = \ theory_strings_type_rules.h \ type_enumerator.h \ theory_strings_preprocess.h \ - theory_strings_preprocess.cpp + theory_strings_preprocess.cpp \ + regexp_operation.cpp EXTRA_DIST = \ kinds diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp new file mode 100644 index 000000000..0a7701862 --- /dev/null +++ b/src/theory/strings/regexp_operation.cpp @@ -0,0 +1,656 @@ +/********************* */
+/*! \file regexp_operation.CPP
+ ** \verbatim
+ ** Original author: Tianyi Liang
+ ** Major contributors: Tianyi Liang, Andrew Reynolds
+ ** Minor contributors (to current version): none
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2013-2013 New York University and The University of Iowa
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.\endverbatim
+ **
+ ** \brief Regular Expresion Operations
+ **
+ ** Regular Expresion Operations
+ **/
+
+#include "theory/strings/regexp_operation.h"
+#include "expr/kind.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+RegExpOpr::RegExpOpr() {
+ d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
+ // All Charactors = all printable ones 32-126
+ d_char_start = 'a';//' ';
+ d_char_end = 'c';//'~';
+ d_sigma = mkAllExceptOne( '\0' );
+ d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
+}
+
+bool RegExpOpr::checkConstRegExp( Node r ) {
+ bool ret = true;
+ if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
+ ret = d_cstre_cache[r];
+ } else {
+ if(r.getKind() == kind::STRING_TO_REGEXP) {
+ Node tmp = Rewriter::rewrite( r[0] );
+ ret = tmp.isConst();
+ } else {
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(!checkConstRegExp(r[i])) {
+ ret = false; break;
+ }
+ }
+ }
+ d_cstre_cache[r] = ret;
+ }
+ return ret;
+}
+
+// 0-unknown, 1-yes, 2-no
+int RegExpOpr::delta( Node r ) {
+ Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
+ int ret = 0;
+ if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
+ ret = d_delta_cache[r];
+ } else {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] == d_emptyString) {
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+ } else {
+ ret = 0;
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ bool flag = false;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ int tmp = delta( r[i] );
+ if(tmp == 2) {
+ ret = 2;
+ break;
+ } else if(tmp == 0) {
+ flag = true;
+ }
+ }
+ if(!flag && ret != 2) {
+ ret = 1;
+ }
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ bool flag = false;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ int tmp = delta( r[i] );
+ if(tmp == 1) {
+ ret = 1;
+ break;
+ } else if(tmp == 0) {
+ flag = true;
+ }
+ }
+ if(!flag && ret != 1) {
+ ret = 2;
+ }
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ bool flag = true;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ int tmp = delta( r[i] );
+ if(tmp == 2) {
+ ret = 2; flag=false;
+ break;
+ } else if(tmp == 0) {
+ flag=false;
+ break;
+ }
+ }
+ if(flag) {
+ ret = 1;
+ }
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ ret = 1;
+ }
+ break;
+ case kind::REGEXP_PLUS:
+ {
+ ret = delta( r[0] );
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ ret = 1;
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ ret = 2;
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ d_delta_cache[r] = ret;
+ }
+ Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
+ return ret;
+}
+
+//null - no solution
+Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
+ Assert( c.size() < 2 );
+ Trace("strings-regexp-derivative") << "RegExp-derivative starts with " << mkString( r ) << ", c=" << c << std::endl;
+ Node retNode = Node::null();
+ PairDvStr dv = std::make_pair( r, c );
+ if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
+ retNode = d_dv_cache[dv];
+ } else if( c.isEmptyString() ){
+ int tmp = delta( r );
+ if(tmp == 0) {
+ retNode = Node::null();
+ // TODO variable
+ } else if(tmp == 1) {
+ retNode = r;
+ } else {
+ retNode = Node::null();
+ }
+ } else {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] == d_emptyString) {
+ retNode = Node::null();
+ } else {
+ if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
+ retNode = r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
+ NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
+ } else {
+ retNode = Node::null();
+ }
+ }
+ } else {
+ retNode = Node::null();
+ // TODO variable
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node dc = derivativeSingle(r[i], c);
+ if(!dc.isNull()) {
+ std::vector< Node > vec_nodes2;
+ if(dc != d_emptyString) {
+ vec_nodes2.push_back( dc );
+ }
+ for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
+ if(r[j] != d_emptyString) {
+ vec_nodes2.push_back( r[j] );
+ }
+ }
+ Node tmp = vec_nodes2.size()==0 ? d_emptyString :
+ ( vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ) );
+ if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
+ vec_nodes.push_back( tmp );
+ }
+ }
+
+ if( delta( r[i] ) != 1 ) {
+ break;
+ }
+ }
+ retNode = vec_nodes.size() == 0 ? Node::null() :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node dc = derivativeSingle(r[i], c);
+ if(!dc.isNull()) {
+ if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
+ vec_nodes.push_back( dc );
+ }
+ }
+
+ }
+ retNode = vec_nodes.size() == 0 ? Node::null() :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ bool flag = true;
+ bool flag_sg = false;
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node dc = derivativeSingle(r[i], c);
+ if(!dc.isNull()) {
+ if(dc == d_sigma_star) {
+ flag_sg = true;
+ } else {
+ if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
+ vec_nodes.push_back( dc );
+ }
+ }
+ } else {
+ flag = false;
+ break;
+ }
+ }
+ if(flag) {
+ if(vec_nodes.size() == 0 && flag_sg) {
+ retNode = d_sigma_star;
+ } else {
+ retNode = vec_nodes.size() == 0 ? Node::null() :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
+ }
+ } else {
+ retNode = Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ Node dc = derivativeSingle(r[0], c);
+ if(!dc.isNull()) {
+ retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
+ } else {
+ retNode = Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_PLUS:
+ {
+ Node dc = derivativeSingle(r[0], c);
+ if(!dc.isNull()) {
+ retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
+ } else {
+ retNode = Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ return derivativeSingle(r[0], c);
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ char ch = c.getFirstChar();
+ if (ch >= r[0].getConst< CVC4::String >().getFirstChar() && ch <= r[1].getConst< CVC4::String >().getFirstChar() ) {
+ return d_emptyString;
+ } else {
+ return Node::null();
+ }
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ d_dv_cache[dv] = retNode;
+ }
+ Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << ( retNode.isNull() ? "NULL" : mkString( retNode ) ) << std::endl;
+ return retNode;
+}
+
+void RegExpOpr::firstChar( Node r ) {
+ Trace("strings-regexp-firstchar") << "Head characters: ";
+ for(char ch = d_char_start; ch <= d_char_end; ++ch) {
+ CVC4::String c(ch);
+ Node dc = derivativeSingle(r, ch);
+ if(!dc.isNull()) {
+ Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;
+ }
+ }
+ Trace("strings-regexp-firstchar") << std::endl;
+}
+
+bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] != d_emptyString) {
+ char t1 = r[0].getConst< CVC4::String >().getFirstChar();
+ if(c.isEmptyString()) {
+ vec_chars.push_back( t1 );
+ return true;
+ } else {
+ char t2 = c.getFirstChar();
+ if(t1 != t2) {
+ return false;
+ } else {
+ if(c.size() >= 2) {
+ vec_chars.push_back( c.substr(1,1).getFirstChar() );
+ } else {
+ vec_chars.push_back( '\0' );
+ }
+ return true;
+ }
+ }
+ } else {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if( follow(r[i], c, vec_chars) ) {
+ if(vec_chars[vec_chars.size() - 1] == '\0') {
+ vec_chars.pop_back();
+ c = d_emptyString.getConst< CVC4::String >();
+ }
+ } else {
+ return false;
+ }
+ }
+ vec_chars.push_back( '\0' );
+ return true;
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ bool flag = false;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if( follow(r[i], c, vec_chars) ) {
+ flag=true;
+ }
+ }
+ return flag;
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ std::vector< char > vt2;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ std::vector< char > v_tmp;
+ if( !follow(r[i], c, v_tmp) ) {
+ return false;
+ }
+ std::vector< char > vt3(vt2);
+ vt2.clear();
+ std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
+ if(vt2.size() == 0) {
+ return false;
+ }
+ }
+ vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
+ return true;
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ if(follow(r[0], c, vec_chars)) {
+ if(vec_chars[vec_chars.size() - 1] == '\0') {
+ if(c.isEmptyString()) {
+ return true;
+ } else {
+ vec_chars.pop_back();
+ c = d_emptyString.getConst< CVC4::String >();
+ return follow(r[0], c, vec_chars);
+ }
+ } else {
+ return true;
+ }
+ } else {
+ vec_chars.push_back( '\0' );
+ return true;
+ }
+ }
+ break;
+ /*
+ case kind::REGEXP_PLUS:
+ {
+ ret = delta( r[0] );
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ ret = 1;
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ ret = 2;
+ }
+ break;*/
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ return false;
+ }
+}
+
+Node RegExpOpr::mkAllExceptOne( char exp_c ) {
+ std::vector< Node > vec_nodes;
+ for(char c=d_char_start; c<=d_char_end; ++c) {
+ if(c != exp_c ) {
+ Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
+ vec_nodes.push_back( n );
+ }
+ }
+ return NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+}
+
+Node RegExpOpr::complement( Node r ) {
+ Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r ) << std::endl;
+ Node retNode = r;
+ if( d_compl_cache.find( r ) != d_compl_cache.end() ) {
+ retNode = d_compl_cache[r];
+ } else {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] == d_emptyString) {
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
+ } else {
+ std::vector< Node > vec_nodes;
+ vec_nodes.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) );
+ CVC4::String s = r[0].getConst<String>();
+ for(unsigned i=0; i<s.size(); ++i) {
+ char c = s.substr(i, 1).getFirstChar();
+ Node tmp = mkAllExceptOne( c );
+ if(i != 0 ) {
+ Node tmph = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
+ NodeManager::currentNM()->mkConst( s.substr(0, i) ) );
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmph, tmp );
+ }
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, d_sigma_star );
+ vec_nodes.push_back( tmp );
+ }
+ Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star );
+ vec_nodes.push_back( tmp );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ }
+ } else {
+ Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node tmp = complement( r[i] );
+ for(unsigned j=0; j<i; ++j) {
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r[j], tmp );
+ }
+ if(i != r.getNumChildren() - 1) {
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp,
+ NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ) );
+ }
+ vec_nodes.push_back( tmp );
+ }
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node tmp = complement( r[i] );
+ vec_nodes.push_back( tmp );
+ }
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes );
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node tmp = complement( r[i] );
+ vec_nodes.push_back( tmp );
+ }
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
+ Node tmp = complement( r[0] );
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, tmp );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, retNode, tmp );
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in complement of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ d_compl_cache[r] = retNode;
+ }
+ Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode ) << std::endl;
+ return retNode;
+}
+
+std::string RegExpOpr::niceChar( Node r ) {
+ if(r.isConst()) {
+ std::string s = r.getConst<CVC4::String>().toString() ;
+ return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
+ } else {
+ return r.toString();
+ }
+}
+std::string RegExpOpr::mkString( Node r ) {
+ std::string retStr;
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ retStr += niceChar( r[0] );
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += ".";
+ retStr += mkString( r[i] );
+ }
+ retStr += ")";
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ if(r == d_sigma) {
+ retStr += "{A}";
+ } else {
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += "|";
+ retStr += mkString( r[i] );
+ }
+ retStr += ")";
+ }
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += "&";
+ retStr += mkString( r[i] );
+ }
+ retStr += ")";
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ retStr += mkString( r[0] );
+ retStr += "*";
+ }
+ break;
+ case kind::REGEXP_PLUS:
+ {
+ retStr += mkString( r[0] );
+ retStr += "+";
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ retStr += mkString( r[0] );
+ retStr += "?";
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ retStr += "[";
+ retStr += niceChar( r[0] );
+ retStr += "-";
+ retStr += niceChar( r[1] );
+ retStr += "]";
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+
+ return retStr;
+}
+
+}/* CVC4::theory::strings namespace */
+}/* CVC4::theory namespace */
+}/* CVC4 namespace */
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h new file mode 100644 index 000000000..5f0eab2e0 --- /dev/null +++ b/src/theory/strings/regexp_operation.h @@ -0,0 +1,65 @@ +/********************* */
+/*! \file regexp_operation.h
+ ** \verbatim
+ ** Original author: Tianyi Liang
+ ** Major contributors: Tianyi Liang, Andrew Reynolds
+ ** Minor contributors (to current version): none
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2013-2013 New York University and The University of Iowa
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.\endverbatim
+ **
+ ** \brief Regular Expresion Operations
+ **
+ ** Regular Expresion Operations
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
+#define __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
+
+#include <vector>
+#include "util/hash.h"
+#include "theory/theory.h"
+#include "theory/rewriter.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+class RegExpOpr {
+ typedef std::pair< Node, CVC4::String > PairDvStr;
+private:
+ Node d_emptyString;
+ char d_char_start;
+ char d_char_end;
+ Node d_sigma;
+ Node d_sigma_star;
+
+ std::map< Node, Node > d_compl_cache;
+ std::map< Node, int > d_delta_cache;
+ std::map< PairDvStr, Node > d_dv_cache;
+ std::map< Node, bool > d_cstre_cache;
+ //bool checkStarPlus( Node t );
+ //void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn );
+ //Node simplify( Node t, std::vector< Node > &new_nodes );
+ std::string niceChar( Node r );
+public:
+ RegExpOpr();
+ bool checkConstRegExp( Node r );
+ //void simplify(std::vector< Node > &vec_node);
+ Node mkAllExceptOne( char c );
+ Node complement( Node r );
+ int delta( Node r );
+ Node derivativeSingle( Node r, CVC4::String c );
+ void firstChar( Node r );
+ bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );
+ std::string mkString( Node r );
+};
+
+}/* CVC4::theory::strings namespace */
+}/* CVC4::theory namespace */
+}/* CVC4 namespace */
+
+#endif /* __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index a50c295da..ab6ff9d68 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -60,9 +60,6 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); - //option - //d_regexp_unroll_depth = options::stringRegExpUnrollDepth(); - //d_fmf = options::stringFMF(); } TheoryStrings::~TheoryStrings() { @@ -394,7 +391,27 @@ void TheoryStrings::check(Effort e) { //must record string in regular expressions if ( atom.getKind() == kind::STRING_IN_REGEXP ) { //if(fact[0].getKind() != kind::CONST_STRING) { - d_reg_exp_mem.push_back( assertion ); + //if(polarity) { + d_reg_exp_mem.push_back( assertion ); + /*} else { + Node r = Rewriter::rewrite( atom[1] ); + r = d_regexp_opr.complement( r ); + r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r ); + std::vector< Node > vec_r; + vec_r.push_back( r ); + + StringsPreprocess spp; + spp.simplify( vec_r ); + for( unsigned i=1; i<vec_r.size(); i++ ){ + if(vec_r[i].getKind() == kind::STRING_IN_REGEXP) { + d_reg_exp_mem.push_back( vec_r[i] ); + } else if(vec_r[i].getKind() == kind::EQUAL) { + d_equalityEngine.assertEquality(vec_r[i], true, vec_r[i]); + } else { + Assert(false); + } + } + }*/ //} }else if (atom.getKind() == kind::EQUAL) { d_equalityEngine.assertEquality(atom, polarity, fact); @@ -1869,7 +1886,7 @@ bool TheoryStrings::unrollStar( Node atom ) { Node r = atom[1]; int depth = d_reg_exp_unroll_depth.find( atom )==d_reg_exp_unroll_depth.end() ? 0 : d_reg_exp_unroll_depth[atom]; if( depth <= options::stringRegExpUnrollDepth() ) { - Trace("strings-regex") << "Strings::regex: Unroll " << atom << " for " << ( depth + 1 ) << " times." << std::endl; + Trace("strings-regexp") << "Strings::regexp: Unroll " << atom << " for " << ( depth + 1 ) << " times." << std::endl; d_reg_exp_unroll[atom] = true; //add lemma? Node xeqe = x.eqNode( d_emptyString ); @@ -1930,7 +1947,7 @@ bool TheoryStrings::unrollStar( Node atom ) { sendLemma( ant, lem, "Unroll" ); return true; }else{ - Trace("strings-regex") << "Strings::regex: Stop unrolling " << atom << " the max (" << depth << ") is reached." << std::endl; + Trace("strings-regexp") << "Strings::regexp: Stop unrolling " << atom << " the max (" << depth << ") is reached." << std::endl; return false; } } @@ -1941,7 +1958,7 @@ bool TheoryStrings::checkMemberships() { for( unsigned i=0; i<d_reg_exp_mem.size(); i++ ){ //check regular expression membership Node assertion = d_reg_exp_mem[i]; - Trace("strings-regex") << "We have regular expression assertion : " << assertion << std::endl; + Trace("strings-regexp") << "We have regular expression assertion : " << assertion << std::endl; Node atom = assertion.getKind()==kind::NOT ? assertion[0] : assertion; bool polarity = assertion.getKind()!=kind::NOT; if( polarity ){ @@ -1952,21 +1969,49 @@ bool TheoryStrings::checkMemberships() { //TODO Assert( r.getKind()==kind::REGEXP_STAR ); if( !areEqual( x, d_emptyString ) ){ - if( unrollStar( atom ) ){ + //if(splitRegExp( x, r, atom )) { + // addedLemma = true; + //} else + if( unrollStar( atom ) ) { addedLemma = true; - }else{ - Trace("strings-regex") << "RegEx is incomplete due to " << assertion << ", depth = " << options::stringRegExpUnrollDepth() << std::endl; + } else { + Trace("strings-regexp") << "RegExp is incomplete due to " << assertion << ", depth = " << options::stringRegExpUnrollDepth() << std::endl; is_unk = true; } }else{ - Trace("strings-regex") << "...is satisfied." << std::endl; + Trace("strings-regexp") << "...is satisfied." << std::endl; } }else{ - Trace("strings-regex") << "...Already unrolled." << std::endl; + Trace("strings-regexp") << "...Already unrolled." << std::endl; } }else{ //TODO: negative membership - Trace("strings-regex") << "RegEx is incomplete due to " << assertion << "." << std::endl; + //Node r = Rewriter::rewrite( atom[1] ); + //r = d_regexp_opr.complement( r ); + //Trace("strings-regexp-test") << "Compl( " << d_regexp_opr.mkString( atom[1] ) << " ) is " << d_regexp_opr.mkString( r ) << std::endl; + //Trace("strings-regexp-test") << "Delta( " << d_regexp_opr.mkString( atom[1] ) << " ) is " << d_regexp_opr.delta( atom[1] ) << std::endl; + //Trace("strings-regexp-test") << "Delta( " << d_regexp_opr.mkString( r ) << " ) is " << d_regexp_opr.delta( r ) << std::endl; + //Trace("strings-regexp-test") << "Deriv( " << d_regexp_opr.mkString( r ) << ", c='b' ) is " << d_regexp_opr.mkString( d_regexp_opr.derivativeSingle( r, ::CVC4::String("b") ) ) << std::endl; + //Trace("strings-regexp-test") << "FHC( " << d_regexp_opr.mkString( r ) <<" ) is " << std::endl; + //d_regexp_opr.firstChar( r ); + //r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r ); + /* + std::vector< Node > vec_r; + vec_r.push_back( r ); + + StringsPreprocess spp; + spp.simplify( vec_r ); + for( unsigned i=1; i<vec_r.size(); i++ ){ + if(vec_r[i].getKind() == kind::STRING_IN_REGEXP) { + d_reg_exp_mem.push_back( vec_r[i] ); + } else if(vec_r[i].getKind() == kind::EQUAL) { + d_equalityEngine.assertEquality(vec_r[i], true, vec_r[i]); + } else { + Assert(false); + } + } + */ + Trace("strings-regexp") << "RegEx is incomplete due to " << assertion << "." << std::endl; is_unk = true; } } @@ -1975,13 +2020,71 @@ bool TheoryStrings::checkMemberships() { return true; }else{ if( is_unk ){ - Trace("strings-regex") << "SET INCOMPLETE" << std::endl; + Trace("strings-regexp") << "SET INCOMPLETE" << std::endl; d_out->setIncomplete(); } return false; } } +CVC4::String TheoryStrings::getHeadConst( Node x ) { + if( x.isConst() && x != d_emptyString ) { + return x.getConst< String >(); + } else if( x.getKind() == kind::STRING_CONCAT ) { + if( x[0].isConst() && x[0] != d_emptyString ) { + return x.getConst< String >(); + } else { + return d_emptyString.getConst< String >(); + } + } else { + return d_emptyString.getConst< String >(); + } +} + +bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { + x = Rewriter::rewrite( x ); + if(x == d_emptyString) { + //if(d_regexp_opr.delta() == 1) { + //} + return false; + } else { + CVC4::String s = getHeadConst( x ); + if( !s.isEmptyString() && d_regexp_opr.checkConstRegExp( r ) ) { + Node conc = Node::null(); + Node dc = r; + bool flag = true; + for(unsigned i=0; i<s.size(); ++i) { + CVC4::String c = s.substr(i, 1); + dc = d_regexp_opr.derivativeSingle(dc, c); + if(dc.isNull()) { + // CONFLICT + flag = false; + break; + } + } + // send lemma + if(flag) { + Node left = Node::null(); + if(x.isConst()) { + left = d_emptyString; + if(d_regexp_opr.delta(dc)) { + //TODO yes + } else { + // TODO conflict + } + } else { + //TODO find x rest + conc = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, dc ); + } + } + sendLemma(ant, conc, "RegExp Const Split"); + } else { + return false; + } + } + return false; +} + Node TheoryStrings::getNextDecisionRequest() { if(options::stringFMF() && !d_conflict) { //Trace("strings-fmf-debug") << "Strings::FMF: Assertion Level = " << d_valuation.getAssertionLevel() << std::endl; diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 8f21888a2..1291fc94e 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -22,6 +22,7 @@ #include "theory/theory.h" #include "theory/uf/equality_engine.h" #include "theory/strings/theory_strings_preprocess.h" +#include "theory/strings/regexp_operation.h" #include "context/cdchunk_list.h" @@ -252,6 +253,12 @@ protected: void seperateByLength( std::vector< Node >& n, std::vector< std::vector< Node > >& col, std::vector< Node >& lts ); void printConcat( std::vector< Node >& n, const char * c ); + // Regular Expression +private: + RegExpOpr d_regexp_opr; + CVC4::String getHeadConst( Node x ); + bool splitRegExp( Node x, Node r, Node ant ); + private: // Finite Model Finding //bool d_fmf; |