summaryrefslogtreecommitdiff
path: root/src/theory/strings/regexp_operation.cpp
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2019-10-23 18:15:28 -0500
committerAndres Noetzli <andres.noetzli@gmail.com>2019-10-23 16:15:28 -0700
commit64cef995a521ac7211b9e3ed95c85deb186ff352 (patch)
tree927d14ff521127f37192afcfe8b737f15d54e061 /src/theory/strings/regexp_operation.cpp
parent0d4d9bf3f31687d9cf48b0c45ab420b06ff099f7 (diff)
Fixes for SyGuS + regular expressions (#3313)
This commit fixes numerous issues involving the combination of SyGuS and regular expressions. Combining SyGuS and regular expressions may involve constructing regular expressions that are neither variables nor builtin regular expression operators. The code was not robust for this case, either throwing spurious assertion failures or having incorrect behavior.
Diffstat (limited to 'src/theory/strings/regexp_operation.cpp')
-rw-r--r--src/theory/strings/regexp_operation.cpp105
1 files changed, 72 insertions, 33 deletions
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index d2505f4f4..b20e2f3ad 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -48,6 +48,7 @@ RegExpOpr::RegExpOpr()
RegExpOpr::~RegExpOpr() {}
bool RegExpOpr::checkConstRegExp( Node r ) {
+ Assert(r.getType().isRegExp());
Trace("strings-regexp-cstre")
<< "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
RegExpConstType rct = getRegExpConstType(r);
@@ -56,6 +57,7 @@ bool RegExpOpr::checkConstRegExp( Node r ) {
RegExpConstType RegExpOpr::getRegExpConstType(Node r)
{
+ Assert(r.getType().isRegExp());
std::unordered_map<Node, RegExpConstType, NodeHashFunction>::iterator it;
std::vector<TNode> visit;
TNode cur;
@@ -79,11 +81,25 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
{
d_constCache[cur] = RE_C_CONSTANT;
}
+ else if (!isRegExpKind(ck))
+ {
+ // non-regular expression applications, e.g. function applications
+ // with regular expression return type are treated as variables.
+ d_constCache[cur] = RE_C_VARIABLE;
+ }
else
{
d_constCache[cur] = RE_C_UNKNOWN;
visit.push_back(cur);
- visit.insert(visit.end(), cur.begin(), cur.end());
+ if (ck == REGEXP_LOOP)
+ {
+ // only add the first child of loop
+ visit.push_back(cur[0]);
+ }
+ else
+ {
+ visit.insert(visit.end(), cur.begin(), cur.end());
+ }
}
}
else if (it->second == RE_C_UNKNOWN)
@@ -105,6 +121,14 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
return d_constCache[r];
}
+bool RegExpOpr::isRegExpKind(Kind k)
+{
+ return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
+ || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
+ || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
+ || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV;
+}
+
// 0-unknown, 1-yes, 2-no
int RegExpOpr::delta( Node r, Node &exp ) {
Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl;
@@ -113,7 +137,7 @@ int RegExpOpr::delta( Node r, Node &exp ) {
ret = d_delta_cache[r].first;
exp = d_delta_cache[r].second;
} else {
- int k = r.getKind();
+ Kind k = r.getKind();
switch( k ) {
case kind::REGEXP_EMPTY: {
ret = 2;
@@ -241,8 +265,8 @@ int RegExpOpr::delta( Node r, Node &exp ) {
break;
}
default: {
- //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
- Unreachable();
+ Assert(!isRegExpKind(k));
+ break;
}
}
if(!exp.isNull()) {
@@ -481,8 +505,9 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
break;
}
default: {
- //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
- Unreachable();
+ Assert(!isRegExpKind(r.getKind()));
+ return 0;
+ break;
}
}
if(retNode != d_emptyRegexp) {
@@ -515,7 +540,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
retNode = d_emptyRegexp;
}
} else {
- int k = r.getKind();
+ Kind k = r.getKind();
switch( k ) {
case kind::REGEXP_EMPTY: {
retNode = d_emptyRegexp;
@@ -657,6 +682,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
default: {
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
Unreachable();
+ break;
}
}
if(retNode != d_emptyRegexp) {
@@ -680,19 +706,11 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
// cset is code points
std::set<unsigned> cset;
SetNodes vset;
- int k = r.getKind();
+ Kind k = r.getKind();
switch( k ) {
case kind::REGEXP_EMPTY: {
break;
}
- case kind::REGEXP_SIGMA: {
- Assert(d_lastchar < std::numeric_limits<unsigned>::max());
- for (unsigned i = 0; i <= d_lastchar; i++)
- {
- cset.insert(i);
- }
- break;
- }
case kind::REGEXP_RANGE: {
unsigned a = r[0].getConst<String>().front();
a = String::convertUnsignedIntToCode(a);
@@ -767,9 +785,20 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
firstChars(r[0], cset, vset);
break;
}
+ case kind::REGEXP_SIGMA:
default: {
- Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl;
- Unreachable();
+ // we do not expect to call this function on regular expressions that
+ // aren't a standard regular expression kind. However, if we do, then
+ // the following code is conservative and says that the current
+ // regular expression can begin with any character.
+ Assert(k == REGEXP_SIGMA);
+ // can start with any character
+ Assert(d_lastchar < std::numeric_limits<unsigned>::max());
+ for (unsigned i = 0; i <= d_lastchar; i++)
+ {
+ cset.insert(i);
+ }
+ break;
}
}
pcset.insert(cset.begin(), cset.end());
@@ -817,7 +846,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
if(itr != d_simpl_neg_cache.end()) {
new_nodes.push_back( itr->second );
} else {
- int k = r.getKind();
+ Kind k = r.getKind();
Node conc;
switch( k ) {
case kind::REGEXP_EMPTY: {
@@ -1018,13 +1047,16 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
break;
}
default: {
- Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
- Assert( false, "Unsupported Term" );
+ Assert(!isRegExpKind(k));
+ break;
}
}
- conc = Rewriter::rewrite( conc );
- new_nodes.push_back( conc );
- d_simpl_neg_cache[p] = conc;
+ if (!conc.isNull())
+ {
+ conc = Rewriter::rewrite(conc);
+ new_nodes.push_back(conc);
+ d_simpl_neg_cache[p] = conc;
+ }
}
}
void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
@@ -1034,7 +1066,7 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
if(itr != d_simpl_cache.end()) {
new_nodes.push_back( itr->second );
} else {
- int k = r.getKind();
+ Kind k = r.getKind();
Node conc;
switch( k ) {
case kind::REGEXP_EMPTY: {
@@ -1191,13 +1223,16 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
break;
}
default: {
- Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
- Assert( false, "Unsupported Term" );
+ Assert(!isRegExpKind(k));
+ break;
}
}
- conc = Rewriter::rewrite( conc );
- new_nodes.push_back( conc );
- d_simpl_cache[p] = conc;
+ if (!conc.isNull())
+ {
+ conc = Rewriter::rewrite(conc);
+ new_nodes.push_back(conc);
+ d_simpl_cache[p] = conc;
+ }
}
}
@@ -1644,9 +1679,13 @@ std::string RegExpOpr::mkString( Node r ) {
break;
}
default:
- Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
- //Assert( false );
- //return Node::null();
+ {
+ std::stringstream ss;
+ ss << r;
+ retStr = ss.str();
+ Assert(!isRegExpKind(r.getKind()));
+ break;
+ }
}
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback