summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-02-26 13:54:41 -0600
committerGitHub <noreply@github.com>2020-02-26 13:54:41 -0600
commitb320aa323923822a7702997bbca05e8512da55a4 (patch)
tree62ed510b54bbfef908296a264002052efc4fff98 /src
parentf26ea8026e94252e4f1418be473d10a5f957b988 (diff)
Basic support for regular expression complement (#3437)
Fixes #3408 . Adds basic rewriter, parsing, type checking and implements the required cases in regexp_operation.cpp. It also adds some missing documentation in regexp_operation.h
Diffstat (limited to 'src')
-rw-r--r--src/api/cvc4cpp.cpp2
-rw-r--r--src/api/cvc4cppkind.h8
-rw-r--r--src/parser/cvc/Cvc.g3
-rw-r--r--src/parser/smt2/smt2.cpp1
-rw-r--r--src/printer/smt2/smt2_printer.cpp2
-rw-r--r--src/theory/strings/kinds2
-rw-r--r--src/theory/strings/regexp_operation.cpp74
-rw-r--r--src/theory/strings/regexp_operation.h21
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp13
9 files changed, 109 insertions, 17 deletions
diff --git a/src/api/cvc4cpp.cpp b/src/api/cvc4cpp.cpp
index 622d48ac9..4e5f4604e 100644
--- a/src/api/cvc4cpp.cpp
+++ b/src/api/cvc4cpp.cpp
@@ -277,6 +277,7 @@ const static std::unordered_map<Kind, CVC4::Kind, KindHashFunction> s_kinds{
{REGEXP_LOOP, CVC4::Kind::REGEXP_LOOP},
{REGEXP_EMPTY, CVC4::Kind::REGEXP_EMPTY},
{REGEXP_SIGMA, CVC4::Kind::REGEXP_SIGMA},
+ {REGEXP_COMPLEMENT, CVC4::Kind::REGEXP_COMPLEMENT},
/* Quantifiers --------------------------------------------------------- */
{FORALL, CVC4::Kind::FORALL},
{EXISTS, CVC4::Kind::EXISTS},
@@ -540,6 +541,7 @@ const static std::unordered_map<CVC4::Kind, Kind, CVC4::kind::KindHashFunction>
{CVC4::Kind::REGEXP_LOOP, REGEXP_LOOP},
{CVC4::Kind::REGEXP_EMPTY, REGEXP_EMPTY},
{CVC4::Kind::REGEXP_SIGMA, REGEXP_SIGMA},
+ {CVC4::Kind::REGEXP_COMPLEMENT, REGEXP_COMPLEMENT},
/* Quantifiers ----------------------------------------------------- */
{CVC4::Kind::FORALL, FORALL},
{CVC4::Kind::EXISTS, EXISTS},
diff --git a/src/api/cvc4cppkind.h b/src/api/cvc4cppkind.h
index 591ff9b1e..ca5696537 100644
--- a/src/api/cvc4cppkind.h
+++ b/src/api/cvc4cppkind.h
@@ -2172,6 +2172,14 @@ enum CVC4_PUBLIC Kind : int32_t
* mkTerm(Kind kind)
*/
REGEXP_SIGMA,
+ /**
+ * Regexp complement.
+ * Parameters: 1
+ * -[1]: Term of sort RegExp
+ * Create with:
+ * mkTerm(Kind kind, Term child1)
+ */
+ REGEXP_COMPLEMENT,
#if 0
/* regexp rv (internal use only) */
REGEXP_RV,
diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g
index 3fde52bbe..33ca7bcf2 100644
--- a/src/parser/cvc/Cvc.g
+++ b/src/parser/cvc/Cvc.g
@@ -240,6 +240,7 @@ tokens {
REGEXP_LOOP_TOK = 'RE_LOOP';
REGEXP_EMPTY_TOK = 'RE_EMPTY';
REGEXP_SIGMA_TOK = 'RE_SIGMA';
+ REGEXP_COMPLEMENT_TOK = 'RE_COMPLEMENT';
SETS_CARD_TOK = 'CARD';
@@ -2045,6 +2046,8 @@ stringTerm[CVC4::Expr& f]
{ f = MK_EXPR(CVC4::kind::REGEXP_RANGE, f, f2); }
| REGEXP_LOOP_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN
{ f = MK_EXPR(CVC4::kind::REGEXP_LOOP, f, f2, f3); }
+ | REGEXP_COMPLEMENT_TOK LPAREN formula[f] RPAREN
+ { f = MK_EXPR(CVC4::kind::REGEXP_COMPLEMENT, f); }
| REGEXP_EMPTY_TOK
{ f = MK_EXPR(CVC4::kind::REGEXP_EMPTY, std::vector<Expr>()); }
| REGEXP_SIGMA_TOK
diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp
index 42111f581..8faeab491 100644
--- a/src/parser/smt2/smt2.cpp
+++ b/src/parser/smt2/smt2.cpp
@@ -200,6 +200,7 @@ void Smt2::addStringOperators() {
addOperator(kind::REGEXP_OPT, "re.opt");
addOperator(kind::REGEXP_RANGE, "re.range");
addOperator(kind::REGEXP_LOOP, "re.loop");
+ addOperator(kind::REGEXP_COMPLEMENT, "re.comp");
addOperator(kind::STRING_LT, "str.<");
addOperator(kind::STRING_LEQ, "str.<=");
}
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp
index 07422cd8b..e9a4d0a83 100644
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -665,6 +665,7 @@ void Smt2Printer::toStream(std::ostream& out,
case kind::REGEXP_OPT:
case kind::REGEXP_RANGE:
case kind::REGEXP_LOOP:
+ case kind::REGEXP_COMPLEMENT:
case kind::REGEXP_EMPTY:
case kind::REGEXP_SIGMA: out << smtKindString(k, d_variant) << " "; break;
@@ -1236,6 +1237,7 @@ static string smtKindString(Kind k, Variant v)
case kind::REGEXP_OPT: return "re.opt";
case kind::REGEXP_RANGE: return "re.range";
case kind::REGEXP_LOOP: return "re.loop";
+ case kind::REGEXP_COMPLEMENT: return "re.comp";
//sep theory
case kind::SEP_STAR: return "sep";
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds
index aa1e2627a..052b75302 100644
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -68,6 +68,7 @@ operator REGEXP_PLUS 1 "regexp +"
operator REGEXP_OPT 1 "regexp ?"
operator REGEXP_RANGE 2 "regexp range"
operator REGEXP_LOOP 2:3 "regexp loop"
+operator REGEXP_COMPLEMENT 1 "regexp complement"
operator REGEXP_EMPTY 0 "regexp empty"
operator REGEXP_SIGMA 0 "regexp all characters"
@@ -85,6 +86,7 @@ typerule REGEXP_PLUS "SimpleTypeRule<RRegExp, ARegExp>"
typerule REGEXP_OPT "SimpleTypeRule<RRegExp, ARegExp>"
typerule REGEXP_RANGE ::CVC4::theory::strings::RegExpRangeTypeRule
typerule REGEXP_LOOP "SimpleTypeRule<RRegExp, ARegExp, AInteger, AOptional<AInteger>>"
+typerule REGEXP_COMPLEMENT "SimpleTypeRule<RRegExp, ARegExp>"
typerule STRING_TO_REGEXP "SimpleTypeRule<RRegExp, AString>"
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index 1b2de0eb5..048dc88b6 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -68,9 +68,9 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
visit.pop_back();
it = d_constCache.find(cur);
+ Kind ck = cur.getKind();
if (it == d_constCache.end())
{
- Kind ck = cur.getKind();
if (ck == STRING_TO_REGEXP)
{
Node tmp = Rewriter::rewrite(cur[0]);
@@ -104,7 +104,7 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
}
else if (it->second == RE_C_UNKNOWN)
{
- RegExpConstType ret = RE_C_CONRETE_CONSTANT;
+ RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
for (const Node& cn : cur)
{
it = d_constCache.find(cn);
@@ -126,7 +126,8 @@ bool RegExpOpr::isRegExpKind(Kind k)
return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
|| k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
|| k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
- || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV;
+ || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
+ || k == REGEXP_COMPLEMENT;
}
// 0-unknown, 1-yes, 2-no
@@ -264,6 +265,14 @@ int RegExpOpr::delta( Node r, Node &exp ) {
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ int tmp = delta(r[0], exp);
+ // flip the result if known
+ tmp = tmp == 0 ? 0 : (3 - tmp);
+ exp = exp.isNull() ? exp : exp.negate();
+ break;
+ }
default: {
Assert(!isRegExpKind(k));
break;
@@ -504,6 +513,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ // don't know result
+ return 0;
+ break;
+ }
default: {
Assert(!isRegExpKind(r.getKind()));
return 0;
@@ -679,6 +694,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
//Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
break;
}
+ case kind::REGEXP_COMPLEMENT:
default: {
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
Unreachable();
@@ -786,12 +802,13 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
break;
}
case kind::REGEXP_SIGMA:
+ case kind::REGEXP_COMPLEMENT:
default: {
// we do not expect to call this function on regular expressions that
// aren't a standard regular expression kind. However, if we do, then
// the following code is conservative and says that the current
// regular expression can begin with any character.
- Assert(k == REGEXP_SIGMA);
+ Assert(isRegExpKind(k));
// can start with any character
Assert(d_lastchar < std::numeric_limits<unsigned>::max());
for (unsigned i = 0; i <= d_lastchar; i++)
@@ -1046,6 +1063,12 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ // ~( s in complement(R) ) ---> s in R
+ conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
+ break;
+ }
default: {
Assert(!isRegExpKind(k));
break;
@@ -1240,6 +1263,12 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
}
break;
}
+ case kind::REGEXP_COMPLEMENT:
+ {
+ // s in complement(R) ---> ~( s in R )
+ conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]).negate();
+ break;
+ }
default: {
Assert(!isRegExpKind(k));
break;
@@ -1305,10 +1334,14 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
if(n == d_emptyRegexp) {
r1 = d_emptyRegexp;
r2 = d_emptyRegexp;
+ return;
} else if(n == d_emptySingleton) {
r1 = d_emptySingleton;
r2 = d_emptySingleton;
- } else if(n.getKind() == kind::REGEXP_RV) {
+ }
+ Kind nk = n.getKind();
+ if (nk == REGEXP_RV)
+ {
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
<< "Exceeded UINT32_MAX in RegExpOpr::convert2";
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
@@ -1318,7 +1351,9 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
} else {
r2 = n;
}
- } else if(n.getKind() == kind::REGEXP_CONCAT) {
+ }
+ else if (nk == REGEXP_CONCAT)
+ {
bool flag = true;
std::vector<Node> vr1, vr2;
for( unsigned i=0; i<n.getNumChildren(); i++ ) {
@@ -1344,7 +1379,9 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
r1 = d_emptySingleton;
r2 = n;
}
- } else if(n.getKind() == kind::REGEXP_UNION) {
+ }
+ else if (nk == REGEXP_UNION)
+ {
std::vector<Node> vr1, vr2;
for( unsigned i=0; i<n.getNumChildren(); i++ ) {
Node t1, t2;
@@ -1354,15 +1391,16 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
}
r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
- } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) {
- r1 = d_emptySingleton;
- r2 = n;
- } else if(n.getKind() == kind::REGEXP_LOOP) {
- //TODO:LOOP
+ }
+ else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
+ || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
+ {
+ // this leaves n unchanged
r1 = d_emptySingleton;
r2 = n;
- //Unreachable();
- } else {
+ }
+ else
+ {
//is it possible?
Unreachable();
}
@@ -1541,6 +1579,7 @@ Node RegExpOpr::removeIntersection(Node r) {
case REGEXP_CONCAT:
case REGEXP_UNION:
case REGEXP_STAR:
+ case REGEXP_COMPLEMENT:
{
NodeBuilder<> nb(rk);
for (const Node& rc : r)
@@ -1696,6 +1735,13 @@ std::string RegExpOpr::mkString( Node r ) {
retStr += ">";
break;
}
+ case REGEXP_COMPLEMENT:
+ {
+ retStr += "^(";
+ retStr += mkString(r[0]);
+ retStr += ")";
+ break;
+ }
default:
{
std::stringstream ss;
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
index 91d5df744..b9dbedba5 100644
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -41,10 +41,13 @@ namespace strings {
*/
enum RegExpConstType
{
- // the regular expression doesn't contain variables or re.allchar or re.range
+ // the regular expression doesn't contain variables or re.comp,
+ // re.allchar or re.range (call these three operators "non-concrete
+ // operators"). Notice that re.comp is a non-concrete operator
+ // since it can be seen as indirectly defined in terms of re.allchar.
RE_C_CONRETE_CONSTANT,
// the regular expression doesn't contain variables, but may contain
- // re.allchar or re.range
+ // re.comp, re.allchar or re.range
RE_C_CONSTANT,
// the regular expression may contain variables
RE_C_VARIABLE,
@@ -122,6 +125,20 @@ class RegExpOpr {
/** is k a native operator whose return type is a regular expression? */
static bool isRegExpKind(Kind k);
void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
+ /**
+ * This method returns 1 if the empty string is in r, 2 if the empty string
+ * is not in r, or 0 if it is unknown whether the empty string is in r.
+ * TODO (project #2): refactor the return value of this function.
+ *
+ * If this method returns 0, then exp is updated to an explanation that
+ * would imply that the empty string is in r.
+ *
+ * For example,
+ * - delta( (re.inter (str.to.re x) (re.* "A")) ) returns 0 and sets exp to
+ * x = "",
+ * - delta( (re.++ (str.to.re "A") R) ) returns 2,
+ * - delta( (re.union (re.* "A") R) ) returns 1.
+ */
int delta( Node r, Node &exp );
int derivativeS( Node r, CVC4::String c, Node &retNode );
Node derivativeSingle( Node r, CVC4::String c );
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 339d11dd2..9cd4c1ecc 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -1329,6 +1329,11 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i
}
}
}
+ case REGEXP_COMPLEMENT:
+ {
+ return !testConstStringInRegExp(s, index_start, r[0]);
+ break;
+ }
default: {
Assert(!RegExpOpr::isRegExpKind(k));
return false;
@@ -1469,7 +1474,13 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
retNode = nm->mkNode(AND,
nm->mkNode(LEQ, nm->mkNode(STRING_CODE, r[0]), xcode),
nm->mkNode(LEQ, xcode, nm->mkNode(STRING_CODE, r[1])));
- }else if(x != node[0] || r != node[1]) {
+ }
+ else if (r.getKind() == REGEXP_COMPLEMENT)
+ {
+ retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
+ }
+ else if (x != node[0] || r != node[1])
+ {
retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r );
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback