summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Noetzli <andres.noetzli@gmail.com>2018-06-15 12:37:36 -0700
committerAndres Noetzli <andres.noetzli@gmail.com>2018-06-15 12:37:36 -0700
commit74dc5dafba5f4b316944c12371836102accc904d (patch)
tree4b3e6fd531fc3901fb9187dc29b09397222f8f8c
parentea24eec6b7914550c84cb09c569b7fc80304d8e7 (diff)
String rewriter: Add length preserving rewrite
This commit adds methods to the string rewriter to transform a string into a normalized string with the same length (but not the same content necessarily). One of the use cases is normalizing the string before the start index in an indexof operation. For example: ``` str.indexof(str.++("ABCD", x), y, 3) ---> str.indexof(str.++("AAAD", x), y, 3) ``` In addition to the helper methods, this commit adds the indexof rewrite above.
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp91
-rw-r--r--src/theory/strings/theory_strings_rewriter.h16
-rw-r--r--test/unit/theory/theory_strings_rewriter_white.h77
3 files changed, 184 insertions, 0 deletions
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 4ab81da7c..e69222df4 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -19,10 +19,13 @@
#include <stdint.h>
#include <algorithm>
+#include "expr/node_builder.h"
#include "options/strings_options.h"
#include "smt/logic_exception.h"
#include "theory/arith/arith_msum.h"
#include "theory/theory.h"
+#include "util/integer.h"
+#include "util/rational.h"
using namespace std;
using namespace CVC4;
@@ -2015,6 +2018,30 @@ Node TheoryStringsRewriter::rewriteIndexof( Node node ) {
return returnRewrite(node, negone, "idof-nctn");
}
}
+ else
+ {
+ Node new_len = node[2];
+ std::vector<Node> nr;
+ if (stripSymbolicLength(children0, nr, 1, new_len))
+ {
+ // Normalize the string before the start index.
+ //
+ // For example:
+ // str.indexof(str.++("ABCD", x), y, 3) --->
+ // str.indexof(str.++("AAAD", x), y, 3)
+ Node normNr = lengthPreserveRewrite(mkConcat(kind::STRING_CONCAT, nr));
+ if (normNr != mkConcat(kind::STRING_CONCAT, nr))
+ {
+ std::vector<Node> normNrChildren;
+ getConcat(normNr, normNrChildren);
+ std::vector<Node> children(normNrChildren);
+ children.insert(children.end(), children0.begin(), children0.end());
+ Node nn = mkConcat(kind::STRING_CONCAT, children);
+ Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
+ return returnRewrite(node, res, "idof-norm-prefix");
+ }
+ }
+ }
if (node[2].isConst() && node[2].getConst<Rational>().sgn()==0)
{
@@ -3035,6 +3062,70 @@ bool TheoryStringsRewriter::stripConstantEndpoints(std::vector<Node>& n1,
return changed;
}
+Node TheoryStringsRewriter::canonicalStrForSymbolicLength(Node len)
+{
+ NodeManager* nm = NodeManager::currentNM();
+
+ Node res;
+ if (len.getKind() == kind::CONST_RATIONAL)
+ {
+ // c -> "A" repeated c times
+ Rational ratLen = len.getConst<Rational>();
+ Assert(ratLen.getDenominator() == 1);
+ Integer intLen = ratLen.getNumerator();
+ res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A')));
+ }
+ else if (len.getKind() == kind::PLUS)
+ {
+ // x + y -> norm(x) + norm(y)
+ NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
+ for (const auto& n : len)
+ {
+ Node sn = canonicalStrForSymbolicLength(n);
+ if (sn.isNull())
+ {
+ return Node::null();
+ }
+ std::vector<Node> snChildren;
+ getConcat(sn, snChildren);
+ concatBuilder.append(snChildren);
+ }
+ res = concatBuilder.constructNode();
+ }
+ else if (len.getKind() == kind::MULT && len.getNumChildren() == 2
+ && len[0].isConst())
+ {
+ // c * x -> norm(x) repeated c times
+ Rational ratReps = len[0].getConst<Rational>();
+ Assert(ratReps.getDenominator() == 1);
+ Integer intReps = ratReps.getNumerator();
+
+ Node nRep = canonicalStrForSymbolicLength(len[1]);
+ std::vector<Node> nRepChildren;
+ getConcat(nRep, nRepChildren);
+ NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
+ for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
+ {
+ concatBuilder.append(nRepChildren);
+ }
+ res = concatBuilder.constructNode();
+ }
+ else if (len.getKind() == kind::STRING_LENGTH)
+ {
+ // len(x) -> x
+ res = len[0];
+ }
+ return res;
+}
+
+Node TheoryStringsRewriter::lengthPreserveRewrite(Node n)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
+ Node res = canonicalStrForSymbolicLength(len);
+ return res.isNull() ? n : res;
+}
+
bool TheoryStringsRewriter::checkEntailNonEmpty(Node a)
{
Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h
index 4c78a1945..59842dd53 100644
--- a/src/theory/strings/theory_strings_rewriter.h
+++ b/src/theory/strings/theory_strings_rewriter.h
@@ -377,6 +377,22 @@ class TheoryStringsRewriter {
std::vector<Node>& nb,
std::vector<Node>& ne,
int dir = 0);
+
+ /**
+ * Given a symbolic length n, returns the canonical string for that length.
+ * For example if n is constant, this function returns a string consisting of
+ * "A" repeated n times. Returns the null node if no such string exists.
+ */
+ static Node canonicalStrForSymbolicLength(Node n);
+
+ /** length preserving rewrite
+ *
+ * Given input n, this returns a string n' whose length is equivalent to n.
+ * We apply certain normalizations to n', such as replacing all constants
+ * that are not relevant to length by "A".
+ */
+ static Node lengthPreserveRewrite(Node n);
+
/** entail non-empty
*
* Checks whether string a is entailed to be non-empty. Is equivalent to
diff --git a/test/unit/theory/theory_strings_rewriter_white.h b/test/unit/theory/theory_strings_rewriter_white.h
index acee9754b..d5422f999 100644
--- a/test/unit/theory/theory_strings_rewriter_white.h
+++ b/test/unit/theory/theory_strings_rewriter_white.h
@@ -245,4 +245,81 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
Node res_a_repl_substr = Rewriter::rewrite(a_repl_substr);
TS_ASSERT_EQUALS(res_repl_substr_a, res_a_repl_substr);
}
+
+ void testLengthPreserveRewrite()
+ {
+ TypeNode intType = d_nm->integerType();
+ TypeNode strType = d_nm->stringType();
+
+ Node empty = d_nm->mkConst(::CVC4::String(""));
+ Node abcd = d_nm->mkConst(::CVC4::String("ABCD"));
+ Node f = d_nm->mkConst(::CVC4::String("F"));
+ Node gh = d_nm->mkConst(::CVC4::String("GH"));
+ Node ij = d_nm->mkConst(::CVC4::String("IJ"));
+
+ Node i = d_nm->mkVar("i", intType);
+ Node s = d_nm->mkVar("s", strType);
+ Node x = d_nm->mkVar("x", strType);
+ Node y = d_nm->mkVar("y", strType);
+
+ // Same length preserving rewrite for:
+ //
+ // (str.++ "ABCD" (str.++ x x))
+ //
+ // (str.++ "GH" (str.repl "GH" "IJ") "IJ")
+ Node concat1 = d_nm->mkNode(
+ kind::STRING_CONCAT, abcd, d_nm->mkNode(kind::STRING_CONCAT, x, x));
+ Node concat2 = d_nm->mkNode(kind::STRING_CONCAT,
+ gh,
+ x,
+ d_nm->mkNode(kind::STRING_STRREPL, x, gh, ij),
+ ij);
+ Node res_concat1 = TheoryStringsRewriter::lengthPreserveRewrite(concat1);
+ Node res_concat2 = TheoryStringsRewriter::lengthPreserveRewrite(concat2);
+ TS_ASSERT_EQUALS(res_concat1, res_concat2);
+ }
+
+ void testRewriteIndexOf()
+ {
+ TypeNode strType = d_nm->stringType();
+
+ Node a = d_nm->mkConst(::CVC4::String("A"));
+ Node abcd = d_nm->mkConst(::CVC4::String("ABCD"));
+ Node aaad = d_nm->mkConst(::CVC4::String("AAAD"));
+ Node b = d_nm->mkConst(::CVC4::String("B"));
+ Node x = d_nm->mkVar("x", strType);
+ Node y = d_nm->mkVar("y", strType);
+ Node one = d_nm->mkConst(Rational(2));
+ Node three = d_nm->mkConst(Rational(3));
+
+ // Same normal form for:
+ //
+ // (str.to.int (str.indexof "A" x 1))
+ //
+ // (str.to.int (str.indexof "B" x 1))
+ Node a_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, a, x, one);
+ Node itos_a_idof_x = d_nm->mkNode(kind::STRING_ITOS, a_idof_x);
+ Node b_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, b, x, one);
+ Node itos_b_idof_x = d_nm->mkNode(kind::STRING_ITOS, b_idof_x);
+ Node res_itos_a_idof_x = Rewriter::rewrite(itos_a_idof_x);
+ Node res_itos_b_idof_x = Rewriter::rewrite(itos_b_idof_x);
+ TS_ASSERT_EQUALS(res_itos_a_idof_x, res_itos_b_idof_x);
+
+ // Same normal form for:
+ //
+ // (str.indexof (str.++ "ABCD" x) y 3)
+ //
+ // (str.indexof (str.++ "AAAD" x) y 3)
+ Node idof_abcd = d_nm->mkNode(kind::STRING_STRIDOF,
+ d_nm->mkNode(kind::STRING_CONCAT, abcd, x),
+ y,
+ three);
+ Node idof_aaad = d_nm->mkNode(kind::STRING_STRIDOF,
+ d_nm->mkNode(kind::STRING_CONCAT, aaad, x),
+ y,
+ three);
+ Node res_idof_abcd = Rewriter::rewrite(idof_abcd);
+ Node res_idof_aaad = Rewriter::rewrite(idof_aaad);
+ TS_ASSERT_EQUALS(res_idof_abcd, res_idof_aaad);
+ }
};
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback