diff options
author | Andrew Reynolds <andrew.j.reynolds@gmail.com> | 2020-02-26 13:54:41 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-26 13:54:41 -0600 |
commit | b320aa323923822a7702997bbca05e8512da55a4 (patch) | |
tree | 62ed510b54bbfef908296a264002052efc4fff98 /src/theory/strings | |
parent | f26ea8026e94252e4f1418be473d10a5f957b988 (diff) |
Basic support for regular expression complement (#3437)
Fixes #3408 .
Adds basic rewriter, parsing, type checking and implements the required cases in regexp_operation.cpp. It also adds some missing documentation in regexp_operation.h
Diffstat (limited to 'src/theory/strings')
-rw-r--r-- | src/theory/strings/kinds | 2 | ||||
-rw-r--r-- | src/theory/strings/regexp_operation.cpp | 74 | ||||
-rw-r--r-- | src/theory/strings/regexp_operation.h | 21 | ||||
-rw-r--r-- | src/theory/strings/theory_strings_rewriter.cpp | 13 |
4 files changed, 93 insertions, 17 deletions
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index aa1e2627a..052b75302 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -68,6 +68,7 @@ operator REGEXP_PLUS 1 "regexp +" operator REGEXP_OPT 1 "regexp ?" operator REGEXP_RANGE 2 "regexp range" operator REGEXP_LOOP 2:3 "regexp loop" +operator REGEXP_COMPLEMENT 1 "regexp complement" operator REGEXP_EMPTY 0 "regexp empty" operator REGEXP_SIGMA 0 "regexp all characters" @@ -85,6 +86,7 @@ typerule REGEXP_PLUS "SimpleTypeRule<RRegExp, ARegExp>" typerule REGEXP_OPT "SimpleTypeRule<RRegExp, ARegExp>" typerule REGEXP_RANGE ::CVC4::theory::strings::RegExpRangeTypeRule typerule REGEXP_LOOP "SimpleTypeRule<RRegExp, ARegExp, AInteger, AOptional<AInteger>>" +typerule REGEXP_COMPLEMENT "SimpleTypeRule<RRegExp, ARegExp>" typerule STRING_TO_REGEXP "SimpleTypeRule<RRegExp, AString>" diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index 1b2de0eb5..048dc88b6 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -68,9 +68,9 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) visit.pop_back(); it = d_constCache.find(cur); + Kind ck = cur.getKind(); if (it == d_constCache.end()) { - Kind ck = cur.getKind(); if (ck == STRING_TO_REGEXP) { Node tmp = Rewriter::rewrite(cur[0]); @@ -104,7 +104,7 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) } else if (it->second == RE_C_UNKNOWN) { - RegExpConstType ret = RE_C_CONRETE_CONSTANT; + RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT; for (const Node& cn : cur) { it = d_constCache.find(cn); @@ -126,7 +126,8 @@ bool RegExpOpr::isRegExpKind(Kind k) return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT - || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV; + || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV + || k == REGEXP_COMPLEMENT; } // 0-unknown, 1-yes, 2-no @@ -264,6 +265,14 @@ int RegExpOpr::delta( Node r, Node &exp ) { } break; } + case kind::REGEXP_COMPLEMENT: + { + int tmp = delta(r[0], exp); + // flip the result if known + tmp = tmp == 0 ? 0 : (3 - tmp); + exp = exp.isNull() ? exp : exp.negate(); + break; + } default: { Assert(!isRegExpKind(k)); break; @@ -504,6 +513,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { } break; } + case kind::REGEXP_COMPLEMENT: + { + // don't know result + return 0; + break; + } default: { Assert(!isRegExpKind(r.getKind())); return 0; @@ -679,6 +694,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl; break; } + case kind::REGEXP_COMPLEMENT: default: { Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; Unreachable(); @@ -786,12 +802,13 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset) break; } case kind::REGEXP_SIGMA: + case kind::REGEXP_COMPLEMENT: default: { // we do not expect to call this function on regular expressions that // aren't a standard regular expression kind. However, if we do, then // the following code is conservative and says that the current // regular expression can begin with any character. - Assert(k == REGEXP_SIGMA); + Assert(isRegExpKind(k)); // can start with any character Assert(d_lastchar < std::numeric_limits<unsigned>::max()); for (unsigned i = 0; i <= d_lastchar; i++) @@ -1046,6 +1063,12 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes } break; } + case kind::REGEXP_COMPLEMENT: + { + // ~( s in complement(R) ) ---> s in R + conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]); + break; + } default: { Assert(!isRegExpKind(k)); break; @@ -1240,6 +1263,12 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes } break; } + case kind::REGEXP_COMPLEMENT: + { + // s in complement(R) ---> ~( s in R ) + conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]).negate(); + break; + } default: { Assert(!isRegExpKind(k)); break; @@ -1305,10 +1334,14 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { if(n == d_emptyRegexp) { r1 = d_emptyRegexp; r2 = d_emptyRegexp; + return; } else if(n == d_emptySingleton) { r1 = d_emptySingleton; r2 = d_emptySingleton; - } else if(n.getKind() == kind::REGEXP_RV) { + } + Kind nk = n.getKind(); + if (nk == REGEXP_RV) + { Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) << "Exceeded UINT32_MAX in RegExpOpr::convert2"; unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); @@ -1318,7 +1351,9 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { } else { r2 = n; } - } else if(n.getKind() == kind::REGEXP_CONCAT) { + } + else if (nk == REGEXP_CONCAT) + { bool flag = true; std::vector<Node> vr1, vr2; for( unsigned i=0; i<n.getNumChildren(); i++ ) { @@ -1344,7 +1379,9 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { r1 = d_emptySingleton; r2 = n; } - } else if(n.getKind() == kind::REGEXP_UNION) { + } + else if (nk == REGEXP_UNION) + { std::vector<Node> vr1, vr2; for( unsigned i=0; i<n.getNumChildren(); i++ ) { Node t1, t2; @@ -1354,15 +1391,16 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { } r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1); r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2); - } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) { - r1 = d_emptySingleton; - r2 = n; - } else if(n.getKind() == kind::REGEXP_LOOP) { - //TODO:LOOP + } + else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE + || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP) + { + // this leaves n unchanged r1 = d_emptySingleton; r2 = n; - //Unreachable(); - } else { + } + else + { //is it possible? Unreachable(); } @@ -1541,6 +1579,7 @@ Node RegExpOpr::removeIntersection(Node r) { case REGEXP_CONCAT: case REGEXP_UNION: case REGEXP_STAR: + case REGEXP_COMPLEMENT: { NodeBuilder<> nb(rk); for (const Node& rc : r) @@ -1696,6 +1735,13 @@ std::string RegExpOpr::mkString( Node r ) { retStr += ">"; break; } + case REGEXP_COMPLEMENT: + { + retStr += "^("; + retStr += mkString(r[0]); + retStr += ")"; + break; + } default: { std::stringstream ss; diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 91d5df744..b9dbedba5 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -41,10 +41,13 @@ namespace strings { */ enum RegExpConstType { - // the regular expression doesn't contain variables or re.allchar or re.range + // the regular expression doesn't contain variables or re.comp, + // re.allchar or re.range (call these three operators "non-concrete + // operators"). Notice that re.comp is a non-concrete operator + // since it can be seen as indirectly defined in terms of re.allchar. RE_C_CONRETE_CONSTANT, // the regular expression doesn't contain variables, but may contain - // re.allchar or re.range + // re.comp, re.allchar or re.range RE_C_CONSTANT, // the regular expression may contain variables RE_C_VARIABLE, @@ -122,6 +125,20 @@ class RegExpOpr { /** is k a native operator whose return type is a regular expression? */ static bool isRegExpKind(Kind k); void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); + /** + * This method returns 1 if the empty string is in r, 2 if the empty string + * is not in r, or 0 if it is unknown whether the empty string is in r. + * TODO (project #2): refactor the return value of this function. + * + * If this method returns 0, then exp is updated to an explanation that + * would imply that the empty string is in r. + * + * For example, + * - delta( (re.inter (str.to.re x) (re.* "A")) ) returns 0 and sets exp to + * x = "", + * - delta( (re.++ (str.to.re "A") R) ) returns 2, + * - delta( (re.union (re.* "A") R) ) returns 1. + */ int delta( Node r, Node &exp ); int derivativeS( Node r, CVC4::String c, Node &retNode ); Node derivativeSingle( Node r, CVC4::String c ); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 339d11dd2..9cd4c1ecc 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1329,6 +1329,11 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i } } } + case REGEXP_COMPLEMENT: + { + return !testConstStringInRegExp(s, index_start, r[0]); + break; + } default: { Assert(!RegExpOpr::isRegExpKind(k)); return false; @@ -1469,7 +1474,13 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { retNode = nm->mkNode(AND, nm->mkNode(LEQ, nm->mkNode(STRING_CODE, r[0]), xcode), nm->mkNode(LEQ, xcode, nm->mkNode(STRING_CODE, r[1]))); - }else if(x != node[0] || r != node[1]) { + } + else if (r.getKind() == REGEXP_COMPLEMENT) + { + retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate(); + } + else if (x != node[0] || r != node[1]) + { retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r ); } |