summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/theory/strings/regexp_elim.cpp82
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp53
-rw-r--r--src/theory/strings/theory_strings_rewriter.h6
3 files changed, 139 insertions, 2 deletions
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp
index 0310e4620..a0d806c52 100644
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -50,11 +50,89 @@ Node RegExpElimination::eliminateConcat(Node atom)
Node x = atom[0];
Node lenx = nm->mkNode(STRING_LENGTH, x);
Node re = atom[1];
+ std::vector<Node> children;
+ TheoryStringsRewriter::getConcat(re, children);
+
+ // If it can be reduced to memberships in fixed length regular expressions.
+ // This includes concatenations where at most one child is of the form
+ // (re.* re.allchar), which we abbreviate _* below, and all other children
+ // have a fixed length.
+ // The intuition why this is a "non-aggressive" rewrite is that membership
+ // into fixed length regular expressions are easy to handle.
+ bool hasFixedLength = true;
+ // the index of _* in re
+ unsigned pivotIndex = 0;
+ bool hasPivotIndex = false;
+ std::vector<Node> childLengths;
+ std::vector<Node> childLengthsPostPivot;
+ for (unsigned i = 0, size = children.size(); i < size; i++)
+ {
+ Node c = children[i];
+ Node fl = TheoryStringsRewriter::getFixedLengthForRegexp(c);
+ if (fl.isNull())
+ {
+ if (!hasPivotIndex && c.getKind() == REGEXP_STAR
+ && c[0].getKind() == REGEXP_SIGMA)
+ {
+ hasPivotIndex = true;
+ pivotIndex = i;
+ // set to zero for the sum below
+ fl = d_zero;
+ }
+ else
+ {
+ hasFixedLength = false;
+ break;
+ }
+ }
+ childLengths.push_back(fl);
+ if (hasPivotIndex)
+ {
+ childLengthsPostPivot.push_back(fl);
+ }
+ }
+ if (hasFixedLength)
+ {
+ Assert(re.getNumChildren() == children.size());
+ Node sum = nm->mkNode(PLUS, childLengths);
+ std::vector<Node> conc;
+ conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, sum));
+ Node currEnd = d_zero;
+ for (unsigned i = 0, size = childLengths.size(); i < size; i++)
+ {
+ if (hasPivotIndex && i == pivotIndex)
+ {
+ Node ppSum = childLengthsPostPivot.size() == 1
+ ? childLengthsPostPivot[0]
+ : nm->mkNode(PLUS, childLengthsPostPivot);
+ currEnd = nm->mkNode(MINUS, lenx, ppSum);
+ }
+ else
+ {
+ Node curr = nm->mkNode(STRING_SUBSTR, x, currEnd, childLengths[i]);
+ Node currMem = nm->mkNode(STRING_IN_REGEXP, curr, re[i]);
+ conc.push_back(currMem);
+ currEnd = nm->mkNode(PLUS, currEnd, childLengths[i]);
+ currEnd = Rewriter::rewrite(currEnd);
+ }
+ }
+ Node res = nm->mkNode(AND, conc);
+ // For example:
+ // x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") -->
+ // len( x ) = 3 ^
+ // substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^
+ // substr(x,1,2) in "AB"
+ // An example with a pivot index:
+ // x in re.++( "AB" ++ _* ++ "C" ) -->
+ // len( x ) >= 3 ^
+ // substr( x, 0, 2 ) in "AB" ^
+ // substr( x, len( x ) - 1, 1 ) in "C"
+ return returnElim(atom, res, "concat-fixed-len");
+ }
+
// memberships of the form x in re.++ * s1 * ... * sn *, where * are
// any number of repetitions (exact or indefinite) of re.allchar.
Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
- std::vector<Node> children;
- TheoryStringsRewriter::getConcat(re, children);
bool onlySigmasAndConsts = true;
std::vector<Node> sep_children;
std::vector<unsigned> gap_minsize;
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index e8a11e62e..c38a5e838 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -4538,6 +4538,59 @@ Node TheoryStringsRewriter::getConstantArithBound(Node a, bool isLower)
return ret;
}
+Node TheoryStringsRewriter::getFixedLengthForRegexp(Node n)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ if (n.getKind() == STRING_TO_REGEXP)
+ {
+ Node ret = nm->mkNode(STRING_LENGTH, n[0]);
+ ret = Rewriter::rewrite(ret);
+ if (ret.isConst())
+ {
+ return ret;
+ }
+ }
+ else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
+ {
+ return nm->mkConst(Rational(1));
+ }
+ else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
+ {
+ Node ret;
+ for (const Node& nc : n)
+ {
+ Node flc = getFixedLengthForRegexp(nc);
+ if (flc.isNull() || (!ret.isNull() && ret != flc))
+ {
+ return Node::null();
+ }
+ else if (ret.isNull())
+ {
+ // first time
+ ret = flc;
+ }
+ }
+ return ret;
+ }
+ else if (n.getKind() == REGEXP_CONCAT)
+ {
+ NodeBuilder<> nb(PLUS);
+ for (const Node& nc : n)
+ {
+ Node flc = getFixedLengthForRegexp(nc);
+ if (flc.isNull())
+ {
+ return flc;
+ }
+ nb << flc;
+ }
+ Node ret = nb.constructNode();
+ ret = Rewriter::rewrite(ret);
+ return ret;
+ }
+ return Node::null();
+}
+
bool TheoryStringsRewriter::checkEntailArithInternal(Node a)
{
Assert(Rewriter::rewrite(a) == a);
diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h
index 2c38ce8dc..3d71423ab 100644
--- a/src/theory/strings/theory_strings_rewriter.h
+++ b/src/theory/strings/theory_strings_rewriter.h
@@ -566,6 +566,12 @@ class TheoryStringsRewriter {
* checkEntailArith( a, strict ) = true.
*/
static Node getConstantArithBound(Node a, bool isLower = true);
+ /** get length for regular expression
+ *
+ * Given regular expression n, if this method returns a non-null value c, then
+ * x in n entails len( x ) = c.
+ */
+ static Node getFixedLengthForRegexp(Node n);
/** decompose substr chain
*
* If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback