From d618de29daae18365d97555dc0a870f34c8eead1 Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Wed, 6 Nov 2013 10:43:06 -0600 Subject: add seperate regular expression files --- src/theory/strings/Makefile.am | 3 +- src/theory/strings/regexp_operation.cpp | 656 ++++++++++++++++++++++++++++++++ src/theory/strings/regexp_operation.h | 65 ++++ src/theory/strings/theory_strings.cpp | 131 ++++++- src/theory/strings/theory_strings.h | 7 + 5 files changed, 847 insertions(+), 15 deletions(-) create mode 100644 src/theory/strings/regexp_operation.cpp create mode 100644 src/theory/strings/regexp_operation.h diff --git a/src/theory/strings/Makefile.am b/src/theory/strings/Makefile.am index 38efa33f3..f7a6fa0a2 100644 --- a/src/theory/strings/Makefile.am +++ b/src/theory/strings/Makefile.am @@ -13,7 +13,8 @@ libstrings_la_SOURCES = \ theory_strings_type_rules.h \ type_enumerator.h \ theory_strings_preprocess.h \ - theory_strings_preprocess.cpp + theory_strings_preprocess.cpp \ + regexp_operation.cpp EXTRA_DIST = \ kinds diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp new file mode 100644 index 000000000..0a7701862 --- /dev/null +++ b/src/theory/strings/regexp_operation.cpp @@ -0,0 +1,656 @@ +/********************* */ +/*! \file regexp_operation.CPP + ** \verbatim + ** Original author: Tianyi Liang + ** Major contributors: Tianyi Liang, Andrew Reynolds + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2013-2013 New York University and The University of Iowa + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief Regular Expresion Operations + ** + ** Regular Expresion Operations + **/ + +#include "theory/strings/regexp_operation.h" +#include "expr/kind.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +RegExpOpr::RegExpOpr() { + d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + // All Charactors = all printable ones 32-126 + d_char_start = 'a';//' '; + d_char_end = 'c';//'~'; + d_sigma = mkAllExceptOne( '\0' ); + d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ); +} + +bool RegExpOpr::checkConstRegExp( Node r ) { + bool ret = true; + if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) { + ret = d_cstre_cache[r]; + } else { + if(r.getKind() == kind::STRING_TO_REGEXP) { + Node tmp = Rewriter::rewrite( r[0] ); + ret = tmp.isConst(); + } else { + for(unsigned i=0; i &vec_chars ) { + int k = r.getKind(); + switch( k ) { + case kind::STRING_TO_REGEXP: + { + if(r[0].isConst()) { + if(r[0] != d_emptyString) { + char t1 = r[0].getConst< CVC4::String >().getFirstChar(); + if(c.isEmptyString()) { + vec_chars.push_back( t1 ); + return true; + } else { + char t2 = c.getFirstChar(); + if(t1 != t2) { + return false; + } else { + if(c.size() >= 2) { + vec_chars.push_back( c.substr(1,1).getFirstChar() ); + } else { + vec_chars.push_back( '\0' ); + } + return true; + } + } + } else { + return false; + } + } else { + return false; + } + } + break; + case kind::REGEXP_CONCAT: + { + for(unsigned i=0; i(); + } + } else { + return false; + } + } + vec_chars.push_back( '\0' ); + return true; + } + break; + case kind::REGEXP_OR: + { + bool flag = false; + for(unsigned i=0; i vt2; + for(unsigned i=0; i v_tmp; + if( !follow(r[i], c, v_tmp) ) { + return false; + } + std::vector< char > vt3(vt2); + vt2.clear(); + std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() ); + if(vt2.size() == 0) { + return false; + } + } + vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() ); + return true; + } + break; + case kind::REGEXP_STAR: + { + if(follow(r[0], c, vec_chars)) { + if(vec_chars[vec_chars.size() - 1] == '\0') { + if(c.isEmptyString()) { + return true; + } else { + vec_chars.pop_back(); + c = d_emptyString.getConst< CVC4::String >(); + return follow(r[0], c, vec_chars); + } + } else { + return true; + } + } else { + vec_chars.push_back( '\0' ); + return true; + } + } + break; + /* + case kind::REGEXP_PLUS: + { + ret = delta( r[0] ); + } + break; + case kind::REGEXP_OPT: + { + ret = 1; + } + break; + case kind::REGEXP_RANGE: + { + ret = 2; + } + break;*/ + default: + //TODO: special sym: sigma, none, all + Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl; + //AlwaysAssert( false ); + //return Node::null(); + return false; + } +} + +Node RegExpOpr::mkAllExceptOne( char exp_c ) { + std::vector< Node > vec_nodes; + for(char c=d_char_start; c<=d_char_end; ++c) { + if(c != exp_c ) { + Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) ); + vec_nodes.push_back( n ); + } + } + return NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); +} + +Node RegExpOpr::complement( Node r ) { + Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r ) << std::endl; + Node retNode = r; + if( d_compl_cache.find( r ) != d_compl_cache.end() ) { + retNode = d_compl_cache[r]; + } else { + int k = r.getKind(); + switch( k ) { + case kind::STRING_TO_REGEXP: + { + if(r[0].isConst()) { + if(r[0] == d_emptyString) { + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star ); + } else { + std::vector< Node > vec_nodes; + vec_nodes.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ); + CVC4::String s = r[0].getConst(); + for(unsigned i=0; imkNode( kind::STRING_TO_REGEXP, + NodeManager::currentNM()->mkConst( s.substr(0, i) ) ); + tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmph, tmp ); + } + tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, d_sigma_star ); + vec_nodes.push_back( tmp ); + } + Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star ); + vec_nodes.push_back( tmp ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); + } + } else { + Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl; + //AlwaysAssert( false ); + //return Node::null(); + } + } + break; + case kind::REGEXP_CONCAT: + { + std::vector< Node > vec_nodes; + for(unsigned i=0; imkNode( kind::REGEXP_CONCAT, r[j], tmp ); + } + if(i != r.getNumChildren() - 1) { + tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, + NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ) ); + } + vec_nodes.push_back( tmp ); + } + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ); + } + break; + case kind::REGEXP_OR: + { + std::vector< Node > vec_nodes; + for(unsigned i=0; imkNode( kind::REGEXP_INTER, vec_nodes ); + } + break; + case kind::REGEXP_INTER: + { + std::vector< Node > vec_nodes; + for(unsigned i=0; imkNode( kind::REGEXP_OR, vec_nodes ); + } + break; + case kind::REGEXP_STAR: + { + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star ); + Node tmp = complement( r[0] ); + tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, tmp ); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, retNode, tmp ); + } + break; + default: + //TODO: special sym: sigma, none, all + Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in complement of RegExp." << std::endl; + //AlwaysAssert( false ); + //return Node::null(); + } + d_compl_cache[r] = retNode; + } + Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode ) << std::endl; + return retNode; +} + +std::string RegExpOpr::niceChar( Node r ) { + if(r.isConst()) { + std::string s = r.getConst().toString() ; + return s == "" ? "{E}" : ( s == " " ? "{ }" : s ); + } else { + return r.toString(); + } +} +std::string RegExpOpr::mkString( Node r ) { + std::string retStr; + int k = r.getKind(); + switch( k ) { + case kind::STRING_TO_REGEXP: + { + retStr += niceChar( r[0] ); + } + break; + case kind::REGEXP_CONCAT: + { + retStr += "("; + for(unsigned i=0; i +#include "util/hash.h" +#include "theory/theory.h" +#include "theory/rewriter.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +class RegExpOpr { + typedef std::pair< Node, CVC4::String > PairDvStr; +private: + Node d_emptyString; + char d_char_start; + char d_char_end; + Node d_sigma; + Node d_sigma_star; + + std::map< Node, Node > d_compl_cache; + std::map< Node, int > d_delta_cache; + std::map< PairDvStr, Node > d_dv_cache; + std::map< Node, bool > d_cstre_cache; + //bool checkStarPlus( Node t ); + //void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn ); + //Node simplify( Node t, std::vector< Node > &new_nodes ); + std::string niceChar( Node r ); +public: + RegExpOpr(); + bool checkConstRegExp( Node r ); + //void simplify(std::vector< Node > &vec_node); + Node mkAllExceptOne( char c ); + Node complement( Node r ); + int delta( Node r ); + Node derivativeSingle( Node r, CVC4::String c ); + void firstChar( Node r ); + bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars ); + std::string mkString( Node r ); +}; + +}/* CVC4::theory::strings namespace */ +}/* CVC4::theory namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index a50c295da..ab6ff9d68 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -60,9 +60,6 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); - //option - //d_regexp_unroll_depth = options::stringRegExpUnrollDepth(); - //d_fmf = options::stringFMF(); } TheoryStrings::~TheoryStrings() { @@ -394,7 +391,27 @@ void TheoryStrings::check(Effort e) { //must record string in regular expressions if ( atom.getKind() == kind::STRING_IN_REGEXP ) { //if(fact[0].getKind() != kind::CONST_STRING) { - d_reg_exp_mem.push_back( assertion ); + //if(polarity) { + d_reg_exp_mem.push_back( assertion ); + /*} else { + Node r = Rewriter::rewrite( atom[1] ); + r = d_regexp_opr.complement( r ); + r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r ); + std::vector< Node > vec_r; + vec_r.push_back( r ); + + StringsPreprocess spp; + spp.simplify( vec_r ); + for( unsigned i=1; imkNode( kind::STRING_IN_REGEXP, atom[0], r ); + /* + std::vector< Node > vec_r; + vec_r.push_back( r ); + + StringsPreprocess spp; + spp.simplify( vec_r ); + for( unsigned i=1; isetIncomplete(); } return false; } } +CVC4::String TheoryStrings::getHeadConst( Node x ) { + if( x.isConst() && x != d_emptyString ) { + return x.getConst< String >(); + } else if( x.getKind() == kind::STRING_CONCAT ) { + if( x[0].isConst() && x[0] != d_emptyString ) { + return x.getConst< String >(); + } else { + return d_emptyString.getConst< String >(); + } + } else { + return d_emptyString.getConst< String >(); + } +} + +bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { + x = Rewriter::rewrite( x ); + if(x == d_emptyString) { + //if(d_regexp_opr.delta() == 1) { + //} + return false; + } else { + CVC4::String s = getHeadConst( x ); + if( !s.isEmptyString() && d_regexp_opr.checkConstRegExp( r ) ) { + Node conc = Node::null(); + Node dc = r; + bool flag = true; + for(unsigned i=0; imkNode( kind::STRING_IN_REGEXP, x, dc ); + } + } + sendLemma(ant, conc, "RegExp Const Split"); + } else { + return false; + } + } + return false; +} + Node TheoryStrings::getNextDecisionRequest() { if(options::stringFMF() && !d_conflict) { //Trace("strings-fmf-debug") << "Strings::FMF: Assertion Level = " << d_valuation.getAssertionLevel() << std::endl; diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 8f21888a2..1291fc94e 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -22,6 +22,7 @@ #include "theory/theory.h" #include "theory/uf/equality_engine.h" #include "theory/strings/theory_strings_preprocess.h" +#include "theory/strings/regexp_operation.h" #include "context/cdchunk_list.h" @@ -252,6 +253,12 @@ protected: void seperateByLength( std::vector< Node >& n, std::vector< std::vector< Node > >& col, std::vector< Node >& lts ); void printConcat( std::vector< Node >& n, const char * c ); + // Regular Expression +private: + RegExpOpr d_regexp_opr; + CVC4::String getHeadConst( Node x ); + bool splitRegExp( Node x, Node r, Node ant ); + private: // Finite Model Finding //bool d_fmf; -- cgit v1.2.3