/********************* */ /*! \file regexp_operation.cpp ** \verbatim ** Top contributors (to current version): ** Tianyi Liang, Tim King, Andrew Reynolds ** This file is part of the CVC4 project. ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** ** \brief Symbolic Regular Expresion Operations ** ** Symbolic Regular Expresion Operations **/ #include "theory/strings/regexp_operation.h" #include "expr/kind.h" #include "options/strings_options.h" #include "theory/strings/theory_strings_rewriter.h" namespace CVC4 { namespace theory { namespace strings { RegExpOpr::RegExpOpr() : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))), d_true(NodeManager::currentNM()->mkConst(true)), d_false(NodeManager::currentNM()->mkConst(false)), d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, d_emptyString)), d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY, std::vector{})), d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))), d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))), d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA, std::vector{})), d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)) { d_lastchar = TheoryStringsRewriter::getAlphabetCardinality()-1; } RegExpOpr::~RegExpOpr() {} bool RegExpOpr::checkConstRegExp( Node r ) { Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl; bool ret = true; if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) { ret = d_cstre_cache[r]; } else { if(r.getKind() == kind::STRING_TO_REGEXP) { Node tmp = Rewriter::rewrite( r[0] ); ret = tmp.isConst(); } else { for(unsigned i=0; i vec_nodes; for(unsigned i=0; imkNode(kind::AND, vec_nodes); } } break; } case kind::REGEXP_UNION: { bool flag = false; std::vector< Node > vec_nodes; for(unsigned i=0; imkNode(kind::OR, vec_nodes); } } break; } case kind::REGEXP_INTER: { bool flag = false; std::vector< Node > vec_nodes; for(unsigned i=0; imkNode(kind::AND, vec_nodes); } } break; } case kind::REGEXP_STAR: { ret = 1; break; } case kind::REGEXP_PLUS: { ret = delta( r[0], exp ); break; } case kind::REGEXP_OPT: { ret = 1; break; } case kind::REGEXP_RANGE: { ret = 2; break; } case kind::REGEXP_LOOP: { if(r[1] == d_zero) { ret = 1; } else { ret = delta(r[0], exp); } break; } default: { //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl; Unreachable(); } } if(!exp.isNull()) { exp = Rewriter::rewrite(exp); } std::pair< int, Node > p(ret, exp); d_delta_cache[r] = p; } Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl; return ret; } // 0-unknown, 1-yes, 2-no int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { Assert( c.size() < 2 ); Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; int ret = 1; retNode = d_emptyRegexp; PairNodeStr dv = std::make_pair( r, c ); if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) { retNode = d_deriv_cache[dv].first; ret = d_deriv_cache[dv].second; } else if( c.isEmptyString() ) { Node expNode; ret = delta( r, expNode ); if(ret == 0) { retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp); } else if(ret == 1) { retNode = r; } std::pair< Node, int > p(retNode, ret); d_deriv_cache[dv] = p; } else { switch( r.getKind() ) { case kind::REGEXP_EMPTY: { ret = 2; break; } case kind::REGEXP_SIGMA: { retNode = d_emptySingleton; break; } case kind::REGEXP_RANGE: { CVC4::String a = r[0].getConst(); CVC4::String b = r[1].getConst(); retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; break; } case kind::STRING_TO_REGEXP: { Node tmp = Rewriter::rewrite(r[0]); if(tmp.isConst()) { if(tmp == d_emptyString) { ret = 2; } else { if (tmp.getConst().front() == c.front()) { retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) ); } else { ret = 2; } } } else { ret = 0; Node rest; if(tmp.getKind() == kind::STRING_CONCAT) { Node t2 = tmp[0]; if(t2.isConst()) { if (t2.getConst().front() == c.front()) { Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) ); std::vector< Node > vec_nodes; vec_nodes.push_back(n); for(unsigned i=1; imkNode(kind::REGEXP_CONCAT, vec_nodes); ret = 1; } else { ret = 2; } } else { tmp = tmp[0]; std::vector< Node > vec_nodes; for(unsigned i=1; imkNode(kind::REGEXP_CONCAT, vec_nodes); } } if(ret == 0) { Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" ); retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk); if(!rest.isNull()) { retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest)); } Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, NodeManager::currentNM()->mkConst(c), sk)); retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp)); } } break; } case kind::REGEXP_CONCAT: { std::vector< Node > vec_nodes; std::vector< Node > delta_nodes; Node dnode = d_true; for(unsigned i=0; i vec_nodes2; if(dc != d_emptySingleton) { vec_nodes2.push_back( dc ); } for(unsigned j=i+1; jmkNode( kind::REGEXP_CONCAT, vec_nodes2 ); if(dnode != d_true) { tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp)); ret = 0; } if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { vec_nodes.push_back( tmp ); } } Node exp3; int rt2 = delta( r[i], exp3 ); if( rt2 == 0 ) { dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3)); } else if( rt2 == 2 ) { break; } } retNode = vec_nodes.size() == 0 ? d_emptyRegexp : ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); if(retNode == d_emptyRegexp) { ret = 2; } break; } case kind::REGEXP_UNION: { std::vector< Node > vec_nodes; for(unsigned i=0; imkNode( kind::REGEXP_UNION, vec_nodes ) ); if(retNode == d_emptyRegexp) { ret = 2; } break; } case kind::REGEXP_INTER: { bool flag = true; bool flag_sg = false; std::vector< Node > vec_nodes; for(unsigned i=0; imkNode( kind::REGEXP_INTER, vec_nodes ) ); if(retNode == d_emptyRegexp) { ret = 2; } } } else { retNode = d_emptyRegexp; ret = 2; } break; } case kind::REGEXP_STAR: { Node dc; ret = derivativeS(r[0], c, dc); retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r )); break; } case kind::REGEXP_LOOP: { if(r[1] == r[2] && r[1] == d_zero) { ret = 2; //retNode = d_emptyRegexp; } else { Node dc; ret = derivativeS(r[0], c, dc); if(dc==d_emptyRegexp) { unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))), NodeManager::currentNM()->mkConst(CVC4::Rational(u-1))); retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); } else { retNode = d_emptyRegexp; } } break; } default: { //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; Unreachable(); } } if(retNode != d_emptyRegexp) { retNode = Rewriter::rewrite( retNode ); } std::pair< Node, int > p(retNode, ret); d_deriv_cache[dv] = p; } Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; return ret; } Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { Assert( c.size() < 2 ); Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; Node retNode = d_emptyRegexp; PairNodeStr dv = std::make_pair( r, c ); if( d_dv_cache.find( dv ) != d_dv_cache.end() ) { retNode = d_dv_cache[dv]; } else if( c.isEmptyString() ){ Node exp; int tmp = delta( r, exp ); if(tmp == 0) { // TODO variable retNode = d_emptyRegexp; } else if(tmp == 1) { retNode = r; } else { retNode = d_emptyRegexp; } } else { int k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { retNode = d_emptyRegexp; break; } case kind::REGEXP_SIGMA: { retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); break; } case kind::REGEXP_RANGE: { CVC4::String a = r[0].getConst(); CVC4::String b = r[1].getConst(); retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; break; } case kind::STRING_TO_REGEXP: { if(r[0].isConst()) { if(r[0] == d_emptyString) { retNode = d_emptyRegexp; } else { if (r[0].getConst().front() == c.front()) { retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) ); } else { retNode = d_emptyRegexp; } } } else { // TODO variable retNode = d_emptyRegexp; } break; } case kind::REGEXP_CONCAT: { Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); std::vector< Node > vec_nodes; for(unsigned i=0; i vec_nodes2; if(dc != rees) { vec_nodes2.push_back( dc ); } for(unsigned j=i+1; jmkNode( kind::REGEXP_CONCAT, vec_nodes2 ); if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { vec_nodes.push_back( tmp ); } } Node exp; if( delta( r[i], exp ) != 1 ) { break; } } retNode = vec_nodes.size() == 0 ? d_emptyRegexp : ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); break; } case kind::REGEXP_UNION: { std::vector< Node > vec_nodes; for(unsigned i=0; imkNode( kind::REGEXP_UNION, vec_nodes ) ); break; } case kind::REGEXP_INTER: { bool flag = true; bool flag_sg = false; std::vector< Node > vec_nodes; for(unsigned i=0; imkNode( kind::REGEXP_INTER, vec_nodes ) ); } } else { retNode = d_emptyRegexp; } break; } case kind::REGEXP_STAR: { Node dc = derivativeSingle(r[0], c); if(dc != d_emptyRegexp) { retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); } else { retNode = d_emptyRegexp; } break; } case kind::REGEXP_LOOP: { if(r[1] == r[2] && r[1] == d_zero) { retNode = d_emptyRegexp; } else { Node dc = derivativeSingle(r[0], c); if(dc != d_emptyRegexp) { unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))), NodeManager::currentNM()->mkConst(CVC4::Rational(u-1))); retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); } else { retNode = d_emptyRegexp; } } //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl; break; } default: { Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; Unreachable(); } } if(retNode != d_emptyRegexp) { retNode = Rewriter::rewrite( retNode ); } d_dv_cache[dv] = retNode; } Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; return retNode; } void RegExpOpr::firstChars(Node r, std::set &pcset, SetNodes &pvset) { Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl; std::map, SetNodes> >::const_iterator itr = d_fset_cache.find(r); if(itr != d_fset_cache.end()) { pcset.insert((itr->second).first.begin(), (itr->second).first.end()); pvset.insert((itr->second).second.begin(), (itr->second).second.end()); } else { // cset is code points std::set cset; SetNodes vset; int k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { break; } case kind::REGEXP_SIGMA: { Assert(d_lastchar < std::numeric_limits::max()); for (unsigned i = 0; i <= d_lastchar; i++) { cset.insert(i); } break; } case kind::REGEXP_RANGE: { unsigned a = r[0].getConst().front(); a = String::convertUnsignedIntToCode(a); unsigned b = r[1].getConst().front(); b = String::convertUnsignedIntToCode(b); Assert(a < b); Assert(b < std::numeric_limits::max()); for (unsigned c = a; c <= b; c++) { cset.insert(c); } break; } case kind::STRING_TO_REGEXP: { Node st = Rewriter::rewrite(r[0]); if(st.isConst()) { CVC4::String s = st.getConst< CVC4::String >(); if(s.size() != 0) { unsigned sc = s.front(); sc = String::convertUnsignedIntToCode(sc); cset.insert(sc); } } else if (st.getKind() == kind::STRING_CONCAT) { if(st[0].isConst()) { CVC4::String s = st[0].getConst(); unsigned sc = s.front(); sc = String::convertUnsignedIntToCode(sc); cset.insert(sc); } else { vset.insert( st[0] ); } } else { vset.insert(st); } break; } case kind::REGEXP_CONCAT: { for(unsigned i=0; i, SetNodes> p(cset, vset); d_fset_cache[r] = p; } if(Trace.isOn("regexp-fset")) { Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {"; for (std::set::const_iterator itr = pcset.begin(); itr != pcset.end(); itr++) { if (itr != pcset.begin()) { Trace("regexp-fset") << ","; } Trace("regexp-fset") << (*itr); } Trace("regexp-fset") << "}" << std::endl; } } //simplify void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) { Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl; Assert(t.getKind() == kind::STRING_IN_REGEXP); Node str = Rewriter::rewrite(t[0]); Node re = Rewriter::rewrite(t[1]); if(polarity) { simplifyPRegExp( str, re, new_nodes ); } else { simplifyNRegExp( str, re, new_nodes ); } Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n"; for(unsigned i=0; i &new_nodes ) { std::pair < Node, Node > p(s, r); NodeManager *nm = NodeManager::currentNM(); std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p); if(itr != d_simpl_neg_cache.end()) { new_nodes.push_back( itr->second ); } else { int k = r.getKind(); Node conc; switch( k ) { case kind::REGEXP_EMPTY: { conc = d_true; break; } case kind::REGEXP_SIGMA: { conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate(); break; } case kind::REGEXP_RANGE: { std::vector< Node > vec; unsigned a = r[0].getConst().front(); a = String::convertUnsignedIntToCode(a); unsigned b = r[1].getConst().front(); b = String::convertUnsignedIntToCode(b); for (unsigned c = a; c <= b; c++) { std::vector tmpVec; tmpVec.push_back(String::convertCodeToUnsignedInt(c)); Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate(); vec.push_back( tmp ); } conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec); break; } case kind::STRING_TO_REGEXP: { conc = s.eqNode(r[0]).negate(); break; } case kind::REGEXP_CONCAT: { //TODO: rewrite empty Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero), NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) ); Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1)); Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1))); Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate(); if(r[0].getKind() == kind::STRING_TO_REGEXP) { s1r1 = s1.eqNode(r[0][0]).negate(); } else if(r[0].getKind() == kind::REGEXP_EMPTY) { s1r1 = d_true; } Node r2 = r[1]; if(r.getNumChildren() > 2) { std::vector< Node > nvec; for(unsigned i=1; imkNode(kind::REGEXP_CONCAT, nvec); } r2 = Rewriter::rewrite(r2); Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate(); if(r2.getKind() == kind::STRING_TO_REGEXP) { s2r2 = s2.eqNode(r2[0]).negate(); } else if(r2.getKind() == kind::REGEXP_EMPTY) { s2r2 = d_true; } conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2); conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc); conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc); break; } case kind::REGEXP_UNION: { std::vector< Node > c_and; for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i]).negate()); } } conc = c_and.size() == 0 ? d_true : c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and); break; } case kind::REGEXP_INTER: { bool emptyflag = false; std::vector< Node > c_or; for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i]).negate()); } } if(emptyflag) { conc = d_true; } else { conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or); } break; } case kind::REGEXP_STAR: { if(s == d_emptyString) { conc = d_false; } else if(r[0].getKind() == kind::REGEXP_EMPTY) { conc = s.eqNode(d_emptyString).negate(); } else if(r[0].getKind() == kind::REGEXP_SIGMA) { conc = d_false; } else { Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); Node sne = s.eqNode(d_emptyString).negate(); Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one), NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) ); //internal Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1); Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)); Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate(); Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate(); conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2); conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc); conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc); conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc); } break; } case kind::REGEXP_LOOP: { Assert(r.getNumChildren() == 3); if(r[1] == r[2]) { if(r[1] == d_zero) { conc = s.eqNode(d_emptyString).negate(); } else if(r[1] == d_one) { conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]).negate(); } else { //unroll for now unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); std::vector vec; for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, vec); conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r2).negate(); } } else { Assert(r[1] == d_zero); //unroll for now unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); std::vector vec; vec.push_back(d_emptySingleton); std::vector vec2; for(unsigned i=1; i<=u; i++) { vec2.push_back(r[0]); Node r2 = i==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec); vec.push_back(r2); } Node r3 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec); conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r3).negate(); } break; } default: { Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl; Assert( false, "Unsupported Term" ); } } conc = Rewriter::rewrite( conc ); new_nodes.push_back( conc ); d_simpl_neg_cache[p] = conc; } } void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { std::pair < Node, Node > p(s, r); NodeManager *nm = NodeManager::currentNM(); std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p); if(itr != d_simpl_cache.end()) { new_nodes.push_back( itr->second ); } else { int k = r.getKind(); Node conc; switch( k ) { case kind::REGEXP_EMPTY: { conc = d_false; break; } case kind::REGEXP_SIGMA: { conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); break; } case kind::REGEXP_RANGE: { conc = s.eqNode( r[0] ); if(r[0] != r[1]) { unsigned a = r[0].getConst().front(); unsigned b = r[1].getConst().front(); a += 1; std::vector anvec; anvec.push_back(a); Node an = nm->mkConst(String(anvec)); Node tmp = a != b ? nm->mkNode(kind::STRING_IN_REGEXP, s, nm->mkNode(kind::REGEXP_RANGE, an, r[1])) : s.eqNode(r[1]); conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp); } break; } case kind::STRING_TO_REGEXP: { conc = s.eqNode(r[0]); break; } case kind::REGEXP_CONCAT: { std::vector< Node > nvec; std::vector< Node > cc; bool emptyflag = false; for(unsigned i=0; imkSkolem( "rc", s.getType(), "created for regular expression concat" ); Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]); nvec.push_back(lem); cc.push_back(sk); } } if(emptyflag) { conc = d_false; } else { Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) ); nvec.push_back(lem); conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec); } break; } case kind::REGEXP_UNION: { std::vector< Node > c_or; for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i])); } } conc = c_or.size() == 0 ? d_false : c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or); break; } case kind::REGEXP_INTER: { std::vector< Node > c_and; bool emptyflag = false; for(unsigned i=0; imkNode(kind::STRING_IN_REGEXP, s, r[i])); } } if(emptyflag) { conc = d_false; } else { conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and); } break; } case kind::REGEXP_STAR: { if(s == d_emptyString) { conc = d_true; } else if(r[0].getKind() == kind::REGEXP_EMPTY) { conc = s.eqNode(d_emptyString); } else if(r[0].getKind() == kind::REGEXP_SIGMA) { conc = d_true; } else { Node se = s.eqNode(d_emptyString); Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]); Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" ); Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" ); Node s1nz = sk1.eqNode(d_emptyString).negate(); Node s2nz = sk2.eqNode(d_emptyString).negate(); Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r); Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs); conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc); } break; } case kind::REGEXP_LOOP: { Assert(r.getNumChildren() == 3); if(r[1] == d_zero) { if(r[2] == d_zero) { conc = s.eqNode( d_emptyString ); } else { //R{0,n} if(s != d_emptyString) { Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); Node sk1ne = sk1.eqNode(d_emptyString).negate(); Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], d_zero, u1)); conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru); conc = NodeManager::currentNM()->mkNode(kind::OR, s.eqNode(d_emptyString), conc); } else { conc = d_true; } } } else { //R^n Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); Node sk1ne = sk1.eqNode(d_emptyString).negate(); Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], u1, u1)); conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru); } break; } default: { Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl; Assert( false, "Unsupported Term" ); } } conc = Rewriter::rewrite( conc ); new_nodes.push_back( conc ); d_simpl_cache[p] = conc; } } bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) { for(std::set< PairNodes >::const_iterator itr = s.begin(); itr != s.end(); ++itr) { if((itr->first == n1 && itr->second == n2) || (itr->first == n2 && itr->second == n1)) { return true; } } return false; } bool RegExpOpr::containC2(unsigned cnt, Node n) { if(n.getKind() == kind::REGEXP_RV) { Assert(n[0].getConst() <= Rational(String::maxSize()), "Exceeded UINT32_MAX in RegExpOpr::containC2"); unsigned y = n[0].getConst().getNumerator().toUnsignedInt(); return cnt == y; } else if(n.getKind() == kind::REGEXP_CONCAT) { for( unsigned i=0; i() <= Rational(String::maxSize()), "Exceeded UINT32_MAX in RegExpOpr::convert2"); unsigned y = n[0].getConst().getNumerator().toUnsignedInt(); r1 = d_emptySingleton; if(cnt == y) { r2 = d_emptyRegexp; } else { r2 = n; } } else if(n.getKind() == kind::REGEXP_CONCAT) { bool flag = true; std::vector vr1, vr2; for( unsigned i=0; imkNode(kind::REGEXP_CONCAT, vr1); vr2.push_back(t2); for( unsigned j=i+1; jmkNode(kind::REGEXP_CONCAT, vr2); flag = false; break; } else { vr1.push_back(n[i]); } } if(flag) { r1 = d_emptySingleton; r2 = n; } } else if(n.getKind() == kind::REGEXP_UNION) { std::vector vr1, vr2; for( unsigned i=0; imkNode(kind::REGEXP_UNION, vr1); r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2); } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) { r1 = d_emptySingleton; r2 = n; } else if(n.getKind() == kind::REGEXP_LOOP) { //TODO:LOOP r1 = d_emptySingleton; r2 = n; //Unreachable(); } else { //is it possible? Unreachable(); } } bool RegExpOpr::testNoRV(Node r) { std::map< Node, bool >::const_iterator itr = d_norv_cache.find(r); if(itr != d_norv_cache.end()) { return itr->second; } else { if(r.getKind() == kind::REGEXP_RV) { return false; } else if(r.getNumChildren() > 1) { for(unsigned int i=0; i cache, unsigned cnt ) { //Assert(checkConstRegExp(r1) && checkConstRegExp(r2)); if(r1 > r2) { TNode tmpNode = r1; r1 = r2; r2 = tmpNode; } Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl; //if(Trace.isOn("regexp-debug")) { // Trace("regexp-debug") << "... with cache:\n"; // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin(); // itr!=cache.end();itr++) { // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl; // } //} std::pair < Node, Node > p(r1, r2); std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p); Node rNode; if(itr != d_inter_cache.end()) { rNode = itr->second; } else { Trace("regexp-int-debug") << " ... not in cache" << std::endl; if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) { Trace("regexp-int-debug") << " ... one is empty set" << std::endl; rNode = d_emptyRegexp; } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) { Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl; Node exp; int r = delta((r1 == d_emptySingleton ? r2 : r1), exp); if(r == 0) { //TODO: variable Unreachable(); } else if(r == 1) { rNode = d_emptySingleton; } else { rNode = d_emptyRegexp; } } else if(r1 == r2) { Trace("regexp-int-debug") << " ... equal" << std::endl; rNode = r1; //convert1(cnt, r1); } else { Trace("regexp-int-debug") << " ... normal checking" << std::endl; std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p); if(itrcache != cache.end()) { rNode = itrcache->second; } else { Trace("regexp-int-debug") << " ... normal without cache" << std::endl; std::vector cset; std::set cset1, cset2; std::set< Node > vset1, vset2; firstChars(r1, cset1, vset1); firstChars(r2, cset2, vset2); Trace("regexp-int-debug") << " ... got fset" << std::endl; std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(), std::inserter(cset, cset.begin())); std::vector< Node > vec_nodes; Node delta_exp; Trace("regexp-int-debug") << " ... try delta" << std::endl; int flag = delta(r1, delta_exp); int flag2 = delta(r2, delta_exp); Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl; if(flag != 2 && flag2 != 2) { if(flag == 1 && flag2 == 1) { vec_nodes.push_back(d_emptySingleton); } else { //TODO: variable Unreachable(); } } if(Trace.isOn("regexp-int-debug")) { Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {"; for (std::vector::const_iterator itr = cset.begin(); itr != cset.end(); itr++) { //CVC4::String c( *itr ); if(itr != cset.begin()) { Trace("regexp-int-debug") << ", "; } Trace("regexp-int-debug") << ( *itr ); } Trace("regexp-int-debug") << std::endl; } std::map< PairNodes, Node > cacheX; for (std::vector::const_iterator itr = cset.begin(); itr != cset.end(); itr++) { std::vector cvec; cvec.push_back(String::convertCodeToUnsignedInt(*itr)); String c(cvec); Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl; Node r1l = derivativeSingle(r1, c); Node r2l = derivativeSingle(r2, c); Trace("regexp-int-debug") << " ... got partial(r1,c) = " << mkString(r1l) << std::endl; Trace("regexp-int-debug") << " ... got partial(r2,c) = " << mkString(r2l) << std::endl; Node rt; if(r1l > r2l) { Node tnode = r1l; r1l = r2l; r2l = tnode; } PairNodes pp(r1l, r2l); std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp); if(itr2 != cacheX.end()) { rt = itr2->second; } else { std::map< PairNodes, Node > cache2(cache); cache2[ p ] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt))); rt = intersectInternal(r1l, r2l, cache2, cnt+1); cacheX[ pp ] = rt; } rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) ); Trace("regexp-int-debug") << " ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl; vec_nodes.push_back(rt); } rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) ); rNode = convert1(cnt, rNode); rNode = Rewriter::rewrite( rNode ); } } Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode) << std::endl; if(testNoRV(rNode)) { d_inter_cache[p] = rNode; } } Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl; return rNode; } Node RegExpOpr::removeIntersection(Node r) { Assert( checkConstRegExp(r) ); std::map < Node, Node >::const_iterator itr = d_rm_inter_cache.find(r); Node retNode; if(itr != d_rm_inter_cache.end()) { retNode = itr->second; } else { switch(r.getKind()) { case kind::REGEXP_EMPTY: { retNode = r; break; } case kind::REGEXP_SIGMA: { retNode = r; break; } case kind::REGEXP_RANGE: { retNode = r; break; } case kind::STRING_TO_REGEXP: { retNode = r; break; } case kind::REGEXP_CONCAT: { std::vector< Node > vec_nodes; for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, vec_nodes) ); break; } case kind::REGEXP_UNION: { std::vector< Node > vec_nodes; for(unsigned i=0; imkNode(kind::REGEXP_UNION, vec_nodes) ); break; } case kind::REGEXP_INTER: { retNode = removeIntersection( r[0] ); for(unsigned i=1; imkNode(kind::REGEXP_STAR, retNode) ); break; } case kind::REGEXP_LOOP: { retNode = removeIntersection( r[0] ); retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, retNode, r[1], r[2]) ); break; } default: { Unreachable(); } } d_rm_inter_cache[r] = retNode; } Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) << " ) = " << mkString(retNode) << std::endl; return retNode; } Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) { if(checkConstRegExp(r1) && checkConstRegExp(r2)) { Node rr1 = removeIntersection(r1); Node rr2 = removeIntersection(r2); std::map< PairNodes, Node > cache; Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ")" << std::endl; Node retNode = intersectInternal(rr1, rr2, cache, 1); Trace("regexp-intersect") << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ") =\n\t" << mkString(retNode) << std::endl; return retNode; } else { spflag = true; return Node::null(); } } //printing std::string RegExpOpr::niceChar(Node r) { if(r.isConst()) { std::string s = r.getConst().toString() ; return s == "." ? "\\." : s; } else { std::string ss = "$" + r.toString(); return ss; } } std::string RegExpOpr::mkString( Node r ) { std::string retStr; if(r.isNull()) { retStr = "\\E"; } else { int k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { retStr += "\\E"; break; } case kind::REGEXP_SIGMA: { retStr += "."; break; } case kind::STRING_TO_REGEXP: { std::string tmp( niceChar( r[0] ) ); retStr += tmp.size()==1? tmp : "(" + tmp + ")"; break; } case kind::REGEXP_CONCAT: { retStr += "("; for(unsigned i=0; i().toString(); retStr += ","; if(r.getNumChildren() == 3) { retStr += r[2].getConst().toString(); } retStr += "}"; break; } case kind::REGEXP_RV: { retStr += "<"; retStr += r[0].getConst().getNumerator().toString(); retStr += ">"; break; } default: Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl; //Assert( false ); //return Node::null(); } } return retStr; } }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */