From 74dc5dafba5f4b316944c12371836102accc904d Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Fri, 15 Jun 2018 12:37:36 -0700 Subject: String rewriter: Add length preserving rewrite This commit adds methods to the string rewriter to transform a string into a normalized string with the same length (but not the same content necessarily). One of the use cases is normalizing the string before the start index in an indexof operation. For example: ``` str.indexof(str.++("ABCD", x), y, 3) ---> str.indexof(str.++("AAAD", x), y, 3) ``` In addition to the helper methods, this commit adds the indexof rewrite above. --- src/theory/strings/theory_strings_rewriter.cpp | 91 ++++++++++++++++++++++++ src/theory/strings/theory_strings_rewriter.h | 16 +++++ test/unit/theory/theory_strings_rewriter_white.h | 77 ++++++++++++++++++++ 3 files changed, 184 insertions(+) diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 4ab81da7c..e69222df4 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -19,10 +19,13 @@ #include #include +#include "expr/node_builder.h" #include "options/strings_options.h" #include "smt/logic_exception.h" #include "theory/arith/arith_msum.h" #include "theory/theory.h" +#include "util/integer.h" +#include "util/rational.h" using namespace std; using namespace CVC4; @@ -2015,6 +2018,30 @@ Node TheoryStringsRewriter::rewriteIndexof( Node node ) { return returnRewrite(node, negone, "idof-nctn"); } } + else + { + Node new_len = node[2]; + std::vector nr; + if (stripSymbolicLength(children0, nr, 1, new_len)) + { + // Normalize the string before the start index. + // + // For example: + // str.indexof(str.++("ABCD", x), y, 3) ---> + // str.indexof(str.++("AAAD", x), y, 3) + Node normNr = lengthPreserveRewrite(mkConcat(kind::STRING_CONCAT, nr)); + if (normNr != mkConcat(kind::STRING_CONCAT, nr)) + { + std::vector normNrChildren; + getConcat(normNr, normNrChildren); + std::vector children(normNrChildren); + children.insert(children.end(), children0.begin(), children0.end()); + Node nn = mkConcat(kind::STRING_CONCAT, children); + Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); + return returnRewrite(node, res, "idof-norm-prefix"); + } + } + } if (node[2].isConst() && node[2].getConst().sgn()==0) { @@ -3035,6 +3062,70 @@ bool TheoryStringsRewriter::stripConstantEndpoints(std::vector& n1, return changed; } +Node TheoryStringsRewriter::canonicalStrForSymbolicLength(Node len) +{ + NodeManager* nm = NodeManager::currentNM(); + + Node res; + if (len.getKind() == kind::CONST_RATIONAL) + { + // c -> "A" repeated c times + Rational ratLen = len.getConst(); + Assert(ratLen.getDenominator() == 1); + Integer intLen = ratLen.getNumerator(); + res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A'))); + } + else if (len.getKind() == kind::PLUS) + { + // x + y -> norm(x) + norm(y) + NodeBuilder<> concatBuilder(kind::STRING_CONCAT); + for (const auto& n : len) + { + Node sn = canonicalStrForSymbolicLength(n); + if (sn.isNull()) + { + return Node::null(); + } + std::vector snChildren; + getConcat(sn, snChildren); + concatBuilder.append(snChildren); + } + res = concatBuilder.constructNode(); + } + else if (len.getKind() == kind::MULT && len.getNumChildren() == 2 + && len[0].isConst()) + { + // c * x -> norm(x) repeated c times + Rational ratReps = len[0].getConst(); + Assert(ratReps.getDenominator() == 1); + Integer intReps = ratReps.getNumerator(); + + Node nRep = canonicalStrForSymbolicLength(len[1]); + std::vector nRepChildren; + getConcat(nRep, nRepChildren); + NodeBuilder<> concatBuilder(kind::STRING_CONCAT); + for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++) + { + concatBuilder.append(nRepChildren); + } + res = concatBuilder.constructNode(); + } + else if (len.getKind() == kind::STRING_LENGTH) + { + // len(x) -> x + res = len[0]; + } + return res; +} + +Node TheoryStringsRewriter::lengthPreserveRewrite(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); + Node res = canonicalStrForSymbolicLength(len); + return res.isNull() ? n : res; +} + bool TheoryStringsRewriter::checkEntailNonEmpty(Node a) { Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 4c78a1945..59842dd53 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -377,6 +377,22 @@ class TheoryStringsRewriter { std::vector& nb, std::vector& ne, int dir = 0); + + /** + * Given a symbolic length n, returns the canonical string for that length. + * For example if n is constant, this function returns a string consisting of + * "A" repeated n times. Returns the null node if no such string exists. + */ + static Node canonicalStrForSymbolicLength(Node n); + + /** length preserving rewrite + * + * Given input n, this returns a string n' whose length is equivalent to n. + * We apply certain normalizations to n', such as replacing all constants + * that are not relevant to length by "A". + */ + static Node lengthPreserveRewrite(Node n); + /** entail non-empty * * Checks whether string a is entailed to be non-empty. Is equivalent to diff --git a/test/unit/theory/theory_strings_rewriter_white.h b/test/unit/theory/theory_strings_rewriter_white.h index acee9754b..d5422f999 100644 --- a/test/unit/theory/theory_strings_rewriter_white.h +++ b/test/unit/theory/theory_strings_rewriter_white.h @@ -245,4 +245,81 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite Node res_a_repl_substr = Rewriter::rewrite(a_repl_substr); TS_ASSERT_EQUALS(res_repl_substr_a, res_a_repl_substr); } + + void testLengthPreserveRewrite() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node f = d_nm->mkConst(::CVC4::String("F")); + Node gh = d_nm->mkConst(::CVC4::String("GH")); + Node ij = d_nm->mkConst(::CVC4::String("IJ")); + + Node i = d_nm->mkVar("i", intType); + Node s = d_nm->mkVar("s", strType); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + + // Same length preserving rewrite for: + // + // (str.++ "ABCD" (str.++ x x)) + // + // (str.++ "GH" (str.repl "GH" "IJ") "IJ") + Node concat1 = d_nm->mkNode( + kind::STRING_CONCAT, abcd, d_nm->mkNode(kind::STRING_CONCAT, x, x)); + Node concat2 = d_nm->mkNode(kind::STRING_CONCAT, + gh, + x, + d_nm->mkNode(kind::STRING_STRREPL, x, gh, ij), + ij); + Node res_concat1 = TheoryStringsRewriter::lengthPreserveRewrite(concat1); + Node res_concat2 = TheoryStringsRewriter::lengthPreserveRewrite(concat2); + TS_ASSERT_EQUALS(res_concat1, res_concat2); + } + + void testRewriteIndexOf() + { + TypeNode strType = d_nm->stringType(); + + Node a = d_nm->mkConst(::CVC4::String("A")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node aaad = d_nm->mkConst(::CVC4::String("AAAD")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node one = d_nm->mkConst(Rational(2)); + Node three = d_nm->mkConst(Rational(3)); + + // Same normal form for: + // + // (str.to.int (str.indexof "A" x 1)) + // + // (str.to.int (str.indexof "B" x 1)) + Node a_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, a, x, one); + Node itos_a_idof_x = d_nm->mkNode(kind::STRING_ITOS, a_idof_x); + Node b_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, b, x, one); + Node itos_b_idof_x = d_nm->mkNode(kind::STRING_ITOS, b_idof_x); + Node res_itos_a_idof_x = Rewriter::rewrite(itos_a_idof_x); + Node res_itos_b_idof_x = Rewriter::rewrite(itos_b_idof_x); + TS_ASSERT_EQUALS(res_itos_a_idof_x, res_itos_b_idof_x); + + // Same normal form for: + // + // (str.indexof (str.++ "ABCD" x) y 3) + // + // (str.indexof (str.++ "AAAD" x) y 3) + Node idof_abcd = d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, abcd, x), + y, + three); + Node idof_aaad = d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, aaad, x), + y, + three); + Node res_idof_abcd = Rewriter::rewrite(idof_abcd); + Node res_idof_aaad = Rewriter::rewrite(idof_aaad); + TS_ASSERT_EQUALS(res_idof_abcd, res_idof_aaad); + } }; -- cgit v1.2.3