From 299192695da6700273a7d9edb78411b1fb957fd0 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Tue, 16 Jul 2019 18:12:15 -0400 Subject: Add support for str.tolower and str.toupper (#3092) --- src/parser/smt2/smt2.cpp | 5 +++ src/printer/smt2/smt2_printer.cpp | 4 +++ src/theory/strings/kinds | 4 +++ src/theory/strings/theory_strings.cpp | 9 +++-- src/theory/strings/theory_strings_preprocess.cpp | 44 +++++++++++++++++++++++ src/theory/strings/theory_strings_rewriter.cpp | 40 +++++++++++++++++++++ src/theory/strings/theory_strings_rewriter.h | 7 ++++ src/theory/strings/theory_strings_type_rules.h | 21 +++++++++++ test/regress/CMakeLists.txt | 2 ++ test/regress/regress0/strings/tolower-simple.smt2 | 11 ++++++ test/regress/regress1/strings/tolower-find.smt2 | 13 +++++++ 11 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 test/regress/regress0/strings/tolower-simple.smt2 create mode 100644 test/regress/regress1/strings/tolower-find.smt2 diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index 54dfa51c9..af374274b 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -169,6 +169,11 @@ void Smt2::addStringOperators() { addOperator(kind::STRING_STRIDOF, "str.indexof" ); addOperator(kind::STRING_STRREPL, "str.replace" ); addOperator(kind::STRING_STRREPLALL, "str.replaceall"); + if (!strictModeEnabled()) + { + addOperator(kind::STRING_TOLOWER, "str.tolower"); + addOperator(kind::STRING_TOUPPER, "str.toupper"); + } addOperator(kind::STRING_PREFIX, "str.prefixof" ); addOperator(kind::STRING_SUFFIX, "str.suffixof" ); // at the moment, we only use this syntax for smt2.6.1 diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index 66d36fe4c..9b34b1d7c 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -609,6 +609,8 @@ void Smt2Printer::toStream(std::ostream& out, case kind::STRING_STRIDOF: case kind::STRING_STRREPL: case kind::STRING_STRREPLALL: + case kind::STRING_TOLOWER: + case kind::STRING_TOUPPER: case kind::STRING_PREFIX: case kind::STRING_SUFFIX: case kind::STRING_LEQ: @@ -1152,6 +1154,8 @@ static string smtKindString(Kind k, Variant v) case kind::STRING_STRIDOF: return "str.indexof" ; case kind::STRING_STRREPL: return "str.replace" ; case kind::STRING_STRREPLALL: return "str.replaceall"; + case kind::STRING_TOLOWER: return "str.tolower"; + case kind::STRING_TOUPPER: return "str.toupper"; case kind::STRING_PREFIX: return "str.prefixof" ; case kind::STRING_SUFFIX: return "str.suffixof" ; case kind::STRING_LEQ: return "str.<="; diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index df6085422..715ea8f50 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -28,6 +28,8 @@ operator STRING_SUFFIX 2 "string suffixof" operator STRING_ITOS 1 "integer to string" operator STRING_STOI 1 "string to integer (total function)" operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise" +operator STRING_TOLOWER 1 "string to lowercase conversion" +operator STRING_TOUPPER 1 "string to uppercase conversion" sort STRING_TYPE \ Cardinality::INTEGERS \ @@ -100,6 +102,8 @@ typerule STRING_SUFFIX ::CVC4::theory::strings::StringSuffixOfTypeRule typerule STRING_ITOS ::CVC4::theory::strings::StringIntToStrTypeRule typerule STRING_STOI ::CVC4::theory::strings::StringStrToIntTypeRule typerule STRING_CODE ::CVC4::theory::strings::StringStrToIntTypeRule +typerule STRING_TOUPPER ::CVC4::theory::strings::StringStrToStrTypeRule +typerule STRING_TOLOWER ::CVC4::theory::strings::StringStrToStrTypeRule typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index ac6c0c102..435d1d8c7 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -140,6 +140,8 @@ TheoryStrings::TheoryStrings(context::Context* c, getExtTheory()->addFunctionKind(kind::STRING_IN_REGEXP); getExtTheory()->addFunctionKind(kind::STRING_LEQ); getExtTheory()->addFunctionKind(kind::STRING_CODE); + getExtTheory()->addFunctionKind(kind::STRING_TOLOWER); + getExtTheory()->addFunctionKind(kind::STRING_TOUPPER); // The kinds we are treating as function application in congruence d_equalityEngine.addFunctionKind(kind::STRING_LENGTH); @@ -156,6 +158,8 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF); d_equalityEngine.addFunctionKind(kind::STRING_STRREPL); d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); + d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER); + d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER); d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); @@ -527,7 +531,8 @@ bool TheoryStrings::doReduction(int effort, Node n, bool& isCd) NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL - || k == STRING_STRREPLALL || k == STRING_LEQ); + || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER + || k == STRING_TOUPPER); std::vector new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); @@ -854,7 +859,7 @@ void TheoryStrings::preRegisterTerm(TNode n) { if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS || k == kind::STRING_STOI || k == kind::STRING_STRREPL || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN - || k == STRING_LEQ) + || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER) { std::stringstream ss; ss << "Term of kind " << k diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 6ceeff6f2..e3634f84f 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -486,6 +486,50 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // Thus, replaceall( x, y, z ) = rpaw retNode = rpaw; + } + else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) + { + Node x = t[0]; + Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); + + Node lent = nm->mkNode(STRING_LENGTH, t); + Node lenr = nm->mkNode(STRING_LENGTH, r); + Node eqLenA = lent.eqNode(lenr); + + Node i = nm->mkBoundVar(nm->integerType()); + Node bvi = nm->mkNode(BOUND_VAR_LIST, i); + + Node ci = nm->mkNode(STRING_CODE, nm->mkNode(STRING_SUBSTR, x, i, d_one)); + Node ri = nm->mkNode(STRING_CODE, nm->mkNode(STRING_SUBSTR, r, i, d_one)); + + Node lb = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 97 : 65)); + Node ub = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 122 : 90)); + Node offset = + nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? -32 : 32)); + + Node res = nm->mkNode( + ITE, + nm->mkNode(AND, nm->mkNode(LEQ, lb, ci), nm->mkNode(LEQ, ci, ub)), + nm->mkNode(PLUS, ci, offset), + ci); + + Node bound = + nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LEQ, i, lenr)); + Node rangeA = + nm->mkNode(FORALL, bvi, nm->mkNode(OR, bound.negate(), ri.eqNode(res))); + + // upper 65 ... 90 + // lower 97 ... 122 + // assert: + // len(r) = len(x) ^ + // forall i. 0 <= i < len(r) => + // str.code( str.substr(r,i,1) ) = ite( 97 <= ci <= 122, ci-32, ci) + // where ci = str.code( str.substr(x,i,1) ) + Node assert = nm->mkNode(AND, eqLenA, rangeA); + new_nodes.push_back(assert); + + // Thus, toLower( x ) = r + retNode = r; } else if( t.getKind() == kind::STRING_STRCTN ){ Node x = t[0]; Node s = t[1]; diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index cb5508fb7..37e7d004f 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1484,6 +1484,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { { retNode = rewriteReplaceAll(node); } + else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) + { + retNode = rewriteStrConvert(node); + } else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX) { retNode = rewritePrefixSuffix(node); @@ -2974,6 +2978,42 @@ Node TheoryStringsRewriter::rewriteReplaceInternal(Node node) return Node::null(); } +Node TheoryStringsRewriter::rewriteStrConvert(Node node) +{ + Kind nk = node.getKind(); + Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); + if (node[0].isConst()) + { + std::vector nvec = node[0].getConst().getVec(); + for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) + { + unsigned newChar = CVC4::String::convertUnsignedIntToCode(nvec[i]); + // transform it + // upper 65 ... 90 + // lower 97 ... 122 + if (nk == STRING_TOUPPER) + { + if (newChar >= 97 && newChar <= 122) + { + newChar = newChar - 32; + } + } + else if (nk == STRING_TOLOWER) + { + if (newChar >= 65 && newChar <= 90) + { + newChar = newChar + 32; + } + } + newChar = CVC4::String::convertCodeToUnsignedInt(newChar); + nvec[i] = newChar; + } + Node retNode = NodeManager::currentNM()->mkConst(String(nvec)); + return returnRewrite(node, retNode, "str-conv-const"); + } + return node; +} + Node TheoryStringsRewriter::rewriteStringLeq(Node n) { Assert(n.getKind() == kind::STRING_LEQ); diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index c273ef40e..ee92828de 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -226,6 +226,13 @@ class TheoryStringsRewriter { * str.replaceall. If it returns a non-null ret, then node rewrites to ret. */ static Node rewriteReplaceInternal(Node node); + /** rewrite string convert + * + * This is the entry point for post-rewriting terms n of the form + * str.tolower( s ) and str.toupper( s ) + * Returns the rewritten form of node. + */ + static Node rewriteStrConvert(Node node); /** rewrite string less than or equal * This is the entry point for post-rewriting terms n of the form * str.<=( t, s ) diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 91de8ac01..de77315fc 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -240,6 +240,27 @@ public: } }; +class StringStrToStrTypeRule +{ + public: + inline static TypeNode computeType(NodeManager* nodeManager, + TNode n, + bool check) + { + if (check) + { + TypeNode t = n[0].getType(check); + if (!t.isString()) + { + std::stringstream ss; + ss << "expecting a string term in argument of " << n.getKind(); + throw TypeCheckingExceptionPrivate(n, ss.str()); + } + } + return nodeManager->stringType(); + } +}; + class RegExpConcatTypeRule { public: inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check) diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index b6fa29e28..eb78c1611 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -865,6 +865,7 @@ set(regress_0_tests regress0/strings/strings-native-simple.cvc regress0/strings/strip-endpoint-itos.smt2 regress0/strings/substr-rewrites.smt2 + regress0/strings/tolower-simple.smt2 regress0/strings/type001.smt2 regress0/strings/unsound-0908.smt2 regress0/strings/unsound-repl-rewrite.smt2 @@ -1607,6 +1608,7 @@ set(regress_1_tests regress1/strings/strings-lt-simple.smt2 regress1/strings/strip-endpt-sound.smt2 regress1/strings/substr001.smt2 + regress1/strings/tolower-find.smt2 regress1/strings/timeout-no-resp.smt2 regress1/strings/type002.smt2 regress1/strings/type003.smt2 diff --git a/test/regress/regress0/strings/tolower-simple.smt2 b/test/regress/regress0/strings/tolower-simple.smt2 new file mode 100644 index 000000000..9d2273f8d --- /dev/null +++ b/test/regress/regress0/strings/tolower-simple.smt2 @@ -0,0 +1,11 @@ +(set-logic ALL_SUPPORTED) +(set-option :strings-exp true) +(set-info :status sat) +(declare-const x String) +(declare-const y String) +(declare-const z String) + +(assert (= (str.tolower "aBCDef") x)) +(assert (= x (str.++ y "c" z))) + +(check-sat) diff --git a/test/regress/regress1/strings/tolower-find.smt2 b/test/regress/regress1/strings/tolower-find.smt2 new file mode 100644 index 000000000..ff27a285a --- /dev/null +++ b/test/regress/regress1/strings/tolower-find.smt2 @@ -0,0 +1,13 @@ +(set-logic ALL_SUPPORTED) +(set-option :strings-exp true) +(set-info :status sat) +(declare-const x String) +(declare-const y String) + +(assert (= (str.tolower x) "abcde")) +(assert (= (str.tolower y) "abcde")) +(assert (not (= x "abcde"))) +(assert (not (= y "abcde"))) +(assert (not (= x y))) + +(check-sat) -- cgit v1.2.3