summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTianyi Liang <tianyi-liang@uiowa.edu>2013-11-06 10:43:06 -0600
committerTianyi Liang <tianyi-liang@uiowa.edu>2013-11-06 10:59:42 -0600
commite79fe51d91c329a92b9141a65305ebce5c108c21 (patch)
treeeebd164e2177adaeeafc75d81de6139a3ff27d44
parentad0f78965f23b0994cac6a210650697b9a20cceb (diff)
add seperate regular expression files
-rw-r--r--src/theory/strings/Makefile.am3
-rw-r--r--src/theory/strings/regexp_operation.cpp656
-rw-r--r--src/theory/strings/regexp_operation.h65
-rw-r--r--src/theory/strings/theory_strings.cpp131
-rw-r--r--src/theory/strings/theory_strings.h7
5 files changed, 847 insertions, 15 deletions
diff --git a/src/theory/strings/Makefile.am b/src/theory/strings/Makefile.am
index 38efa33f3..f7a6fa0a2 100644
--- a/src/theory/strings/Makefile.am
+++ b/src/theory/strings/Makefile.am
@@ -13,7 +13,8 @@ libstrings_la_SOURCES = \
theory_strings_type_rules.h \
type_enumerator.h \
theory_strings_preprocess.h \
- theory_strings_preprocess.cpp
+ theory_strings_preprocess.cpp \
+ regexp_operation.cpp
EXTRA_DIST = \
kinds
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
new file mode 100644
index 000000000..0a7701862
--- /dev/null
+++ b/src/theory/strings/regexp_operation.cpp
@@ -0,0 +1,656 @@
+/********************* */
+/*! \file regexp_operation.CPP
+ ** \verbatim
+ ** Original author: Tianyi Liang
+ ** Major contributors: Tianyi Liang, Andrew Reynolds
+ ** Minor contributors (to current version): none
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2013-2013 New York University and The University of Iowa
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.\endverbatim
+ **
+ ** \brief Regular Expresion Operations
+ **
+ ** Regular Expresion Operations
+ **/
+
+#include "theory/strings/regexp_operation.h"
+#include "expr/kind.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+RegExpOpr::RegExpOpr() {
+ d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
+ // All Charactors = all printable ones 32-126
+ d_char_start = 'a';//' ';
+ d_char_end = 'c';//'~';
+ d_sigma = mkAllExceptOne( '\0' );
+ d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
+}
+
+bool RegExpOpr::checkConstRegExp( Node r ) {
+ bool ret = true;
+ if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
+ ret = d_cstre_cache[r];
+ } else {
+ if(r.getKind() == kind::STRING_TO_REGEXP) {
+ Node tmp = Rewriter::rewrite( r[0] );
+ ret = tmp.isConst();
+ } else {
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(!checkConstRegExp(r[i])) {
+ ret = false; break;
+ }
+ }
+ }
+ d_cstre_cache[r] = ret;
+ }
+ return ret;
+}
+
+// 0-unknown, 1-yes, 2-no
+int RegExpOpr::delta( Node r ) {
+ Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
+ int ret = 0;
+ if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
+ ret = d_delta_cache[r];
+ } else {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] == d_emptyString) {
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+ } else {
+ ret = 0;
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ bool flag = false;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ int tmp = delta( r[i] );
+ if(tmp == 2) {
+ ret = 2;
+ break;
+ } else if(tmp == 0) {
+ flag = true;
+ }
+ }
+ if(!flag && ret != 2) {
+ ret = 1;
+ }
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ bool flag = false;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ int tmp = delta( r[i] );
+ if(tmp == 1) {
+ ret = 1;
+ break;
+ } else if(tmp == 0) {
+ flag = true;
+ }
+ }
+ if(!flag && ret != 1) {
+ ret = 2;
+ }
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ bool flag = true;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ int tmp = delta( r[i] );
+ if(tmp == 2) {
+ ret = 2; flag=false;
+ break;
+ } else if(tmp == 0) {
+ flag=false;
+ break;
+ }
+ }
+ if(flag) {
+ ret = 1;
+ }
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ ret = 1;
+ }
+ break;
+ case kind::REGEXP_PLUS:
+ {
+ ret = delta( r[0] );
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ ret = 1;
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ ret = 2;
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ d_delta_cache[r] = ret;
+ }
+ Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
+ return ret;
+}
+
+//null - no solution
+Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
+ Assert( c.size() < 2 );
+ Trace("strings-regexp-derivative") << "RegExp-derivative starts with " << mkString( r ) << ", c=" << c << std::endl;
+ Node retNode = Node::null();
+ PairDvStr dv = std::make_pair( r, c );
+ if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
+ retNode = d_dv_cache[dv];
+ } else if( c.isEmptyString() ){
+ int tmp = delta( r );
+ if(tmp == 0) {
+ retNode = Node::null();
+ // TODO variable
+ } else if(tmp == 1) {
+ retNode = r;
+ } else {
+ retNode = Node::null();
+ }
+ } else {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] == d_emptyString) {
+ retNode = Node::null();
+ } else {
+ if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
+ retNode = r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
+ NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
+ } else {
+ retNode = Node::null();
+ }
+ }
+ } else {
+ retNode = Node::null();
+ // TODO variable
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node dc = derivativeSingle(r[i], c);
+ if(!dc.isNull()) {
+ std::vector< Node > vec_nodes2;
+ if(dc != d_emptyString) {
+ vec_nodes2.push_back( dc );
+ }
+ for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
+ if(r[j] != d_emptyString) {
+ vec_nodes2.push_back( r[j] );
+ }
+ }
+ Node tmp = vec_nodes2.size()==0 ? d_emptyString :
+ ( vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ) );
+ if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
+ vec_nodes.push_back( tmp );
+ }
+ }
+
+ if( delta( r[i] ) != 1 ) {
+ break;
+ }
+ }
+ retNode = vec_nodes.size() == 0 ? Node::null() :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node dc = derivativeSingle(r[i], c);
+ if(!dc.isNull()) {
+ if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
+ vec_nodes.push_back( dc );
+ }
+ }
+
+ }
+ retNode = vec_nodes.size() == 0 ? Node::null() :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ bool flag = true;
+ bool flag_sg = false;
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node dc = derivativeSingle(r[i], c);
+ if(!dc.isNull()) {
+ if(dc == d_sigma_star) {
+ flag_sg = true;
+ } else {
+ if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
+ vec_nodes.push_back( dc );
+ }
+ }
+ } else {
+ flag = false;
+ break;
+ }
+ }
+ if(flag) {
+ if(vec_nodes.size() == 0 && flag_sg) {
+ retNode = d_sigma_star;
+ } else {
+ retNode = vec_nodes.size() == 0 ? Node::null() :
+ ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
+ }
+ } else {
+ retNode = Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ Node dc = derivativeSingle(r[0], c);
+ if(!dc.isNull()) {
+ retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
+ } else {
+ retNode = Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_PLUS:
+ {
+ Node dc = derivativeSingle(r[0], c);
+ if(!dc.isNull()) {
+ retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
+ } else {
+ retNode = Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ return derivativeSingle(r[0], c);
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ char ch = c.getFirstChar();
+ if (ch >= r[0].getConst< CVC4::String >().getFirstChar() && ch <= r[1].getConst< CVC4::String >().getFirstChar() ) {
+ return d_emptyString;
+ } else {
+ return Node::null();
+ }
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ d_dv_cache[dv] = retNode;
+ }
+ Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << ( retNode.isNull() ? "NULL" : mkString( retNode ) ) << std::endl;
+ return retNode;
+}
+
+void RegExpOpr::firstChar( Node r ) {
+ Trace("strings-regexp-firstchar") << "Head characters: ";
+ for(char ch = d_char_start; ch <= d_char_end; ++ch) {
+ CVC4::String c(ch);
+ Node dc = derivativeSingle(r, ch);
+ if(!dc.isNull()) {
+ Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;
+ }
+ }
+ Trace("strings-regexp-firstchar") << std::endl;
+}
+
+bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] != d_emptyString) {
+ char t1 = r[0].getConst< CVC4::String >().getFirstChar();
+ if(c.isEmptyString()) {
+ vec_chars.push_back( t1 );
+ return true;
+ } else {
+ char t2 = c.getFirstChar();
+ if(t1 != t2) {
+ return false;
+ } else {
+ if(c.size() >= 2) {
+ vec_chars.push_back( c.substr(1,1).getFirstChar() );
+ } else {
+ vec_chars.push_back( '\0' );
+ }
+ return true;
+ }
+ }
+ } else {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if( follow(r[i], c, vec_chars) ) {
+ if(vec_chars[vec_chars.size() - 1] == '\0') {
+ vec_chars.pop_back();
+ c = d_emptyString.getConst< CVC4::String >();
+ }
+ } else {
+ return false;
+ }
+ }
+ vec_chars.push_back( '\0' );
+ return true;
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ bool flag = false;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if( follow(r[i], c, vec_chars) ) {
+ flag=true;
+ }
+ }
+ return flag;
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ std::vector< char > vt2;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ std::vector< char > v_tmp;
+ if( !follow(r[i], c, v_tmp) ) {
+ return false;
+ }
+ std::vector< char > vt3(vt2);
+ vt2.clear();
+ std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
+ if(vt2.size() == 0) {
+ return false;
+ }
+ }
+ vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
+ return true;
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ if(follow(r[0], c, vec_chars)) {
+ if(vec_chars[vec_chars.size() - 1] == '\0') {
+ if(c.isEmptyString()) {
+ return true;
+ } else {
+ vec_chars.pop_back();
+ c = d_emptyString.getConst< CVC4::String >();
+ return follow(r[0], c, vec_chars);
+ }
+ } else {
+ return true;
+ }
+ } else {
+ vec_chars.push_back( '\0' );
+ return true;
+ }
+ }
+ break;
+ /*
+ case kind::REGEXP_PLUS:
+ {
+ ret = delta( r[0] );
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ ret = 1;
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ ret = 2;
+ }
+ break;*/
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ return false;
+ }
+}
+
+Node RegExpOpr::mkAllExceptOne( char exp_c ) {
+ std::vector< Node > vec_nodes;
+ for(char c=d_char_start; c<=d_char_end; ++c) {
+ if(c != exp_c ) {
+ Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
+ vec_nodes.push_back( n );
+ }
+ }
+ return NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+}
+
+Node RegExpOpr::complement( Node r ) {
+ Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r ) << std::endl;
+ Node retNode = r;
+ if( d_compl_cache.find( r ) != d_compl_cache.end() ) {
+ retNode = d_compl_cache[r];
+ } else {
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ if(r[0].isConst()) {
+ if(r[0] == d_emptyString) {
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
+ } else {
+ std::vector< Node > vec_nodes;
+ vec_nodes.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) );
+ CVC4::String s = r[0].getConst<String>();
+ for(unsigned i=0; i<s.size(); ++i) {
+ char c = s.substr(i, 1).getFirstChar();
+ Node tmp = mkAllExceptOne( c );
+ if(i != 0 ) {
+ Node tmph = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
+ NodeManager::currentNM()->mkConst( s.substr(0, i) ) );
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmph, tmp );
+ }
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, d_sigma_star );
+ vec_nodes.push_back( tmp );
+ }
+ Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star );
+ vec_nodes.push_back( tmp );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ }
+ } else {
+ Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node tmp = complement( r[i] );
+ for(unsigned j=0; j<i; ++j) {
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r[j], tmp );
+ }
+ if(i != r.getNumChildren() - 1) {
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp,
+ NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ) );
+ }
+ vec_nodes.push_back( tmp );
+ }
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node tmp = complement( r[i] );
+ vec_nodes.push_back( tmp );
+ }
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes );
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ std::vector< Node > vec_nodes;
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ Node tmp = complement( r[i] );
+ vec_nodes.push_back( tmp );
+ }
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
+ Node tmp = complement( r[0] );
+ tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, tmp );
+ retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, retNode, tmp );
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in complement of RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+ d_compl_cache[r] = retNode;
+ }
+ Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode ) << std::endl;
+ return retNode;
+}
+
+std::string RegExpOpr::niceChar( Node r ) {
+ if(r.isConst()) {
+ std::string s = r.getConst<CVC4::String>().toString() ;
+ return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
+ } else {
+ return r.toString();
+ }
+}
+std::string RegExpOpr::mkString( Node r ) {
+ std::string retStr;
+ int k = r.getKind();
+ switch( k ) {
+ case kind::STRING_TO_REGEXP:
+ {
+ retStr += niceChar( r[0] );
+ }
+ break;
+ case kind::REGEXP_CONCAT:
+ {
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += ".";
+ retStr += mkString( r[i] );
+ }
+ retStr += ")";
+ }
+ break;
+ case kind::REGEXP_OR:
+ {
+ if(r == d_sigma) {
+ retStr += "{A}";
+ } else {
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += "|";
+ retStr += mkString( r[i] );
+ }
+ retStr += ")";
+ }
+ }
+ break;
+ case kind::REGEXP_INTER:
+ {
+ retStr += "(";
+ for(unsigned i=0; i<r.getNumChildren(); ++i) {
+ if(i != 0) retStr += "&";
+ retStr += mkString( r[i] );
+ }
+ retStr += ")";
+ }
+ break;
+ case kind::REGEXP_STAR:
+ {
+ retStr += mkString( r[0] );
+ retStr += "*";
+ }
+ break;
+ case kind::REGEXP_PLUS:
+ {
+ retStr += mkString( r[0] );
+ retStr += "+";
+ }
+ break;
+ case kind::REGEXP_OPT:
+ {
+ retStr += mkString( r[0] );
+ retStr += "?";
+ }
+ break;
+ case kind::REGEXP_RANGE:
+ {
+ retStr += "[";
+ retStr += niceChar( r[0] );
+ retStr += "-";
+ retStr += niceChar( r[1] );
+ retStr += "]";
+ }
+ break;
+ default:
+ //TODO: special sym: sigma, none, all
+ Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
+ //AlwaysAssert( false );
+ //return Node::null();
+ }
+
+ return retStr;
+}
+
+}/* CVC4::theory::strings namespace */
+}/* CVC4::theory namespace */
+}/* CVC4 namespace */
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
new file mode 100644
index 000000000..5f0eab2e0
--- /dev/null
+++ b/src/theory/strings/regexp_operation.h
@@ -0,0 +1,65 @@
+/********************* */
+/*! \file regexp_operation.h
+ ** \verbatim
+ ** Original author: Tianyi Liang
+ ** Major contributors: Tianyi Liang, Andrew Reynolds
+ ** Minor contributors (to current version): none
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2013-2013 New York University and The University of Iowa
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.\endverbatim
+ **
+ ** \brief Regular Expresion Operations
+ **
+ ** Regular Expresion Operations
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
+#define __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
+
+#include <vector>
+#include "util/hash.h"
+#include "theory/theory.h"
+#include "theory/rewriter.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+class RegExpOpr {
+ typedef std::pair< Node, CVC4::String > PairDvStr;
+private:
+ Node d_emptyString;
+ char d_char_start;
+ char d_char_end;
+ Node d_sigma;
+ Node d_sigma_star;
+
+ std::map< Node, Node > d_compl_cache;
+ std::map< Node, int > d_delta_cache;
+ std::map< PairDvStr, Node > d_dv_cache;
+ std::map< Node, bool > d_cstre_cache;
+ //bool checkStarPlus( Node t );
+ //void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn );
+ //Node simplify( Node t, std::vector< Node > &new_nodes );
+ std::string niceChar( Node r );
+public:
+ RegExpOpr();
+ bool checkConstRegExp( Node r );
+ //void simplify(std::vector< Node > &vec_node);
+ Node mkAllExceptOne( char c );
+ Node complement( Node r );
+ int delta( Node r );
+ Node derivativeSingle( Node r, CVC4::String c );
+ void firstChar( Node r );
+ bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );
+ std::string mkString( Node r );
+};
+
+}/* CVC4::theory::strings namespace */
+}/* CVC4::theory namespace */
+}/* CVC4 namespace */
+
+#endif /* __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index a50c295da..ab6ff9d68 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -60,9 +60,6 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu
d_true = NodeManager::currentNM()->mkConst( true );
d_false = NodeManager::currentNM()->mkConst( false );
- //option
- //d_regexp_unroll_depth = options::stringRegExpUnrollDepth();
- //d_fmf = options::stringFMF();
}
TheoryStrings::~TheoryStrings() {
@@ -394,7 +391,27 @@ void TheoryStrings::check(Effort e) {
//must record string in regular expressions
if ( atom.getKind() == kind::STRING_IN_REGEXP ) {
//if(fact[0].getKind() != kind::CONST_STRING) {
- d_reg_exp_mem.push_back( assertion );
+ //if(polarity) {
+ d_reg_exp_mem.push_back( assertion );
+ /*} else {
+ Node r = Rewriter::rewrite( atom[1] );
+ r = d_regexp_opr.complement( r );
+ r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r );
+ std::vector< Node > vec_r;
+ vec_r.push_back( r );
+
+ StringsPreprocess spp;
+ spp.simplify( vec_r );
+ for( unsigned i=1; i<vec_r.size(); i++ ){
+ if(vec_r[i].getKind() == kind::STRING_IN_REGEXP) {
+ d_reg_exp_mem.push_back( vec_r[i] );
+ } else if(vec_r[i].getKind() == kind::EQUAL) {
+ d_equalityEngine.assertEquality(vec_r[i], true, vec_r[i]);
+ } else {
+ Assert(false);
+ }
+ }
+ }*/
//}
}else if (atom.getKind() == kind::EQUAL) {
d_equalityEngine.assertEquality(atom, polarity, fact);
@@ -1869,7 +1886,7 @@ bool TheoryStrings::unrollStar( Node atom ) {
Node r = atom[1];
int depth = d_reg_exp_unroll_depth.find( atom )==d_reg_exp_unroll_depth.end() ? 0 : d_reg_exp_unroll_depth[atom];
if( depth <= options::stringRegExpUnrollDepth() ) {
- Trace("strings-regex") << "Strings::regex: Unroll " << atom << " for " << ( depth + 1 ) << " times." << std::endl;
+ Trace("strings-regexp") << "Strings::regexp: Unroll " << atom << " for " << ( depth + 1 ) << " times." << std::endl;
d_reg_exp_unroll[atom] = true;
//add lemma?
Node xeqe = x.eqNode( d_emptyString );
@@ -1930,7 +1947,7 @@ bool TheoryStrings::unrollStar( Node atom ) {
sendLemma( ant, lem, "Unroll" );
return true;
}else{
- Trace("strings-regex") << "Strings::regex: Stop unrolling " << atom << " the max (" << depth << ") is reached." << std::endl;
+ Trace("strings-regexp") << "Strings::regexp: Stop unrolling " << atom << " the max (" << depth << ") is reached." << std::endl;
return false;
}
}
@@ -1941,7 +1958,7 @@ bool TheoryStrings::checkMemberships() {
for( unsigned i=0; i<d_reg_exp_mem.size(); i++ ){
//check regular expression membership
Node assertion = d_reg_exp_mem[i];
- Trace("strings-regex") << "We have regular expression assertion : " << assertion << std::endl;
+ Trace("strings-regexp") << "We have regular expression assertion : " << assertion << std::endl;
Node atom = assertion.getKind()==kind::NOT ? assertion[0] : assertion;
bool polarity = assertion.getKind()!=kind::NOT;
if( polarity ){
@@ -1952,21 +1969,49 @@ bool TheoryStrings::checkMemberships() {
//TODO
Assert( r.getKind()==kind::REGEXP_STAR );
if( !areEqual( x, d_emptyString ) ){
- if( unrollStar( atom ) ){
+ //if(splitRegExp( x, r, atom )) {
+ // addedLemma = true;
+ //} else
+ if( unrollStar( atom ) ) {
addedLemma = true;
- }else{
- Trace("strings-regex") << "RegEx is incomplete due to " << assertion << ", depth = " << options::stringRegExpUnrollDepth() << std::endl;
+ } else {
+ Trace("strings-regexp") << "RegExp is incomplete due to " << assertion << ", depth = " << options::stringRegExpUnrollDepth() << std::endl;
is_unk = true;
}
}else{
- Trace("strings-regex") << "...is satisfied." << std::endl;
+ Trace("strings-regexp") << "...is satisfied." << std::endl;
}
}else{
- Trace("strings-regex") << "...Already unrolled." << std::endl;
+ Trace("strings-regexp") << "...Already unrolled." << std::endl;
}
}else{
//TODO: negative membership
- Trace("strings-regex") << "RegEx is incomplete due to " << assertion << "." << std::endl;
+ //Node r = Rewriter::rewrite( atom[1] );
+ //r = d_regexp_opr.complement( r );
+ //Trace("strings-regexp-test") << "Compl( " << d_regexp_opr.mkString( atom[1] ) << " ) is " << d_regexp_opr.mkString( r ) << std::endl;
+ //Trace("strings-regexp-test") << "Delta( " << d_regexp_opr.mkString( atom[1] ) << " ) is " << d_regexp_opr.delta( atom[1] ) << std::endl;
+ //Trace("strings-regexp-test") << "Delta( " << d_regexp_opr.mkString( r ) << " ) is " << d_regexp_opr.delta( r ) << std::endl;
+ //Trace("strings-regexp-test") << "Deriv( " << d_regexp_opr.mkString( r ) << ", c='b' ) is " << d_regexp_opr.mkString( d_regexp_opr.derivativeSingle( r, ::CVC4::String("b") ) ) << std::endl;
+ //Trace("strings-regexp-test") << "FHC( " << d_regexp_opr.mkString( r ) <<" ) is " << std::endl;
+ //d_regexp_opr.firstChar( r );
+ //r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r );
+ /*
+ std::vector< Node > vec_r;
+ vec_r.push_back( r );
+
+ StringsPreprocess spp;
+ spp.simplify( vec_r );
+ for( unsigned i=1; i<vec_r.size(); i++ ){
+ if(vec_r[i].getKind() == kind::STRING_IN_REGEXP) {
+ d_reg_exp_mem.push_back( vec_r[i] );
+ } else if(vec_r[i].getKind() == kind::EQUAL) {
+ d_equalityEngine.assertEquality(vec_r[i], true, vec_r[i]);
+ } else {
+ Assert(false);
+ }
+ }
+ */
+ Trace("strings-regexp") << "RegEx is incomplete due to " << assertion << "." << std::endl;
is_unk = true;
}
}
@@ -1975,13 +2020,71 @@ bool TheoryStrings::checkMemberships() {
return true;
}else{
if( is_unk ){
- Trace("strings-regex") << "SET INCOMPLETE" << std::endl;
+ Trace("strings-regexp") << "SET INCOMPLETE" << std::endl;
d_out->setIncomplete();
}
return false;
}
}
+CVC4::String TheoryStrings::getHeadConst( Node x ) {
+ if( x.isConst() && x != d_emptyString ) {
+ return x.getConst< String >();
+ } else if( x.getKind() == kind::STRING_CONCAT ) {
+ if( x[0].isConst() && x[0] != d_emptyString ) {
+ return x.getConst< String >();
+ } else {
+ return d_emptyString.getConst< String >();
+ }
+ } else {
+ return d_emptyString.getConst< String >();
+ }
+}
+
+bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) {
+ x = Rewriter::rewrite( x );
+ if(x == d_emptyString) {
+ //if(d_regexp_opr.delta() == 1) {
+ //}
+ return false;
+ } else {
+ CVC4::String s = getHeadConst( x );
+ if( !s.isEmptyString() && d_regexp_opr.checkConstRegExp( r ) ) {
+ Node conc = Node::null();
+ Node dc = r;
+ bool flag = true;
+ for(unsigned i=0; i<s.size(); ++i) {
+ CVC4::String c = s.substr(i, 1);
+ dc = d_regexp_opr.derivativeSingle(dc, c);
+ if(dc.isNull()) {
+ // CONFLICT
+ flag = false;
+ break;
+ }
+ }
+ // send lemma
+ if(flag) {
+ Node left = Node::null();
+ if(x.isConst()) {
+ left = d_emptyString;
+ if(d_regexp_opr.delta(dc)) {
+ //TODO yes
+ } else {
+ // TODO conflict
+ }
+ } else {
+ //TODO find x rest
+ conc = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, dc );
+ }
+ }
+ sendLemma(ant, conc, "RegExp Const Split");
+ } else {
+ return false;
+ }
+ }
+ return false;
+}
+
Node TheoryStrings::getNextDecisionRequest() {
if(options::stringFMF() && !d_conflict) {
//Trace("strings-fmf-debug") << "Strings::FMF: Assertion Level = " << d_valuation.getAssertionLevel() << std::endl;
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 8f21888a2..1291fc94e 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -22,6 +22,7 @@
#include "theory/theory.h"
#include "theory/uf/equality_engine.h"
#include "theory/strings/theory_strings_preprocess.h"
+#include "theory/strings/regexp_operation.h"
#include "context/cdchunk_list.h"
@@ -252,6 +253,12 @@ protected:
void seperateByLength( std::vector< Node >& n, std::vector< std::vector< Node > >& col, std::vector< Node >& lts );
void printConcat( std::vector< Node >& n, const char * c );
+ // Regular Expression
+private:
+ RegExpOpr d_regexp_opr;
+ CVC4::String getHeadConst( Node x );
+ bool splitRegExp( Node x, Node r, Node ant );
+
private:
// Finite Model Finding
//bool d_fmf;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback