From f7a77c4c14af25466e7ce31455a9636e0f8234e3 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Thu, 22 Feb 2018 14:08:13 -0600 Subject: Minor improvements to string rewriter (#1572) --- src/theory/strings/theory_strings.cpp | 337 +++++++++++++------------ src/theory/strings/theory_strings_rewriter.cpp | 183 ++++++++++---- src/theory/strings/theory_strings_rewriter.h | 6 + test/regress/regress1/strings/Makefile.am | 3 +- test/regress/regress1/strings/rew-020618.smt2 | 18 ++ 5 files changed, 346 insertions(+), 201 deletions(-) create mode 100644 test/regress/regress1/strings/rew-020618.smt2 diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 30a5f0fbc..6df2a1fdf 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -2769,170 +2769,193 @@ bool TheoryStrings::processLoop( std::vector< std::vector< Node > > &normal_form if( options::stringAbortLoop() ){ Message() << "Looping word equation encountered." << std::endl; exit( 1 ); - }else{ - Node conc; - Trace("strings-loop") << "Detected possible loop for " << normal_forms[loop_n_index][loop_index] << std::endl; - Trace("strings-loop") << " ... (X)= " << normal_forms[other_n_index][index] << std::endl; - - Trace("strings-loop") << " ... T(Y.Z)= "; - std::vector< Node > vec_t; - for(int lp=index; lp().tailcmp( r.getConst(), c ) ) { - if( c>=0) { - s_zy = NodeManager::currentNM()->mkConst( s_zy.getConst().substr(0, c) ); - r = d_emptyString; - vec_r.clear(); - Trace("strings-loop") << "Strings::Loop: Refactor S(Z.Y)= " << s_zy << ", c=" << c << std::endl; - flag = false; - } - } - if( flag ){ - Trace("strings-loop") << "Strings::Loop: tails are different." << std::endl; - sendInference( info.d_ant, conc, "Loop Conflict", true ); - return false; + } + NodeManager* nm = NodeManager::currentNM(); + Node conc; + Trace("strings-loop") << "Detected possible loop for " + << normal_forms[loop_n_index][loop_index] << std::endl; + Trace("strings-loop") << " ... (X)= " << normal_forms[other_n_index][index] + << std::endl; + + Trace("strings-loop") << " ... T(Y.Z)= "; + std::vector& veci = normal_forms[loop_n_index]; + std::vector vec_t(veci.begin() + index, veci.begin() + loop_index); + Node t_yz = mkConcat(vec_t); + Trace("strings-loop") << " (" << t_yz << ")" << std::endl; + Trace("strings-loop") << " ... S(Z.Y)= "; + std::vector& vecoi = normal_forms[other_n_index]; + std::vector vec_s(vecoi.begin() + index + 1, vecoi.end()); + Node s_zy = mkConcat(vec_s); + Trace("strings-loop") << s_zy << std::endl; + Trace("strings-loop") << " ... R= "; + std::vector vec_r(veci.begin() + loop_index + 1, veci.end()); + Node r = mkConcat(vec_r); + Trace("strings-loop") << r << std::endl; + + if (s_zy.isConst() && r.isConst() && r != d_emptyString) + { + int c; + bool flag = true; + if (s_zy.getConst().tailcmp(r.getConst(), c)) + { + if (c >= 0) + { + s_zy = nm->mkConst(s_zy.getConst().substr(0, c)); + r = d_emptyString; + vec_r.clear(); + Trace("strings-loop") << "Strings::Loop: Refactor S(Z.Y)= " << s_zy + << ", c=" << c << std::endl; + flag = false; } } - - //require that x is non-empty - Node split_eq; - if( !areDisequal( normal_forms[loop_n_index][loop_index], d_emptyString ) ){ - //try to make normal_forms[loop_n_index][loop_index] equal to empty to avoid loop - split_eq = normal_forms[loop_n_index][loop_index].eqNode( d_emptyString ); - }else if( !areDisequal( t_yz, d_emptyString ) && t_yz.getKind()!=kind::CONST_STRING ) { - //try to make normal_forms[loop_n_index][loop_index] equal to empty to avoid loop - split_eq = t_yz.eqNode( d_emptyString ); + if (flag) + { + Trace("strings-loop") << "Strings::Loop: tails are different." + << std::endl; + sendInference(info.d_ant, conc, "Loop Conflict", true); + return false; } - if( !split_eq.isNull() ){ - info.d_conc = NodeManager::currentNM()->mkNode( kind::OR, split_eq, split_eq.negate() ); - info.d_id = 4; - return true; - }else{ - //need to break - info.d_ant.push_back( normal_forms[loop_n_index][loop_index].eqNode( d_emptyString ).negate() ); - if( t_yz.getKind()!=kind::CONST_STRING ) { - info.d_ant.push_back( t_yz.eqNode( d_emptyString ).negate() ); - } - Node ant = mkExplain( info.d_ant ); - info.d_ant.clear(); - info.d_antn.push_back( ant ); - - Node str_in_re; - if( s_zy == t_yz && - r == d_emptyString && - s_zy.isConst() && - s_zy.getConst().isRepeated() - ) { - Node rep_c = NodeManager::currentNM()->mkConst( s_zy.getConst().substr(0, 1) ); - Trace("strings-loop") << "Special case (X)=" << normal_forms[other_n_index][index] << " " << std::endl; - Trace("strings-loop") << "... (C)=" << rep_c << " " << std::endl; - //special case - str_in_re = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, normal_forms[other_n_index][index], - NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, - NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, rep_c ) ) ); - conc = str_in_re; - } else if(t_yz.isConst()) { - Trace("strings-loop") << "Strings::Loop: Const Normal Breaking." << std::endl; - CVC4::String s = t_yz.getConst< CVC4::String >(); - unsigned size = s.size(); - std::vector< Node > vconc; - for(unsigned len=1; len<=size; len++) { - Node y = NodeManager::currentNM()->mkConst(s.substr(0, len)); - Node z = NodeManager::currentNM()->mkConst(s.substr(len, size - len)); - Node restr = s_zy; - Node cc; - if(r != d_emptyString) { - std::vector< Node > v2(vec_r); - v2.insert(v2.begin(), y); - v2.insert(v2.begin(), z); - restr = mkConcat( z, y ); - cc = Rewriter::rewrite(s_zy.eqNode( mkConcat( v2 ) )); - } else { - cc = Rewriter::rewrite(s_zy.eqNode( mkConcat( z, y) )); - } - if(cc == d_false) { - continue; - } - Node conc2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, normal_forms[other_n_index][index], - NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, - NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, y), - NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, - NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, restr)))); - cc = cc==d_true ? conc2 : NodeManager::currentNM()->mkNode( kind::AND, cc, conc2 ); - d_regexp_ant[conc2] = ant; - vconc.push_back(cc); - } - conc = vconc.size()==0 ? Node::null() : vconc.size()==1 ? vconc[0] : NodeManager::currentNM()->mkNode(kind::OR, vconc); - } else { - Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking." << std::endl; - //right - Node sk_w= mkSkolemS( "w_loop" ); - Node sk_y= mkSkolemS( "y_loop", 1 ); - Node sk_z= mkSkolemS( "z_loop" ); - //t1 * ... * tn = y * z - Node conc1 = t_yz.eqNode( mkConcat( sk_y, sk_z ) ); - // s1 * ... * sk = z * y * r - vec_r.insert(vec_r.begin(), sk_y); - vec_r.insert(vec_r.begin(), sk_z); - Node conc2 = s_zy.eqNode( mkConcat( vec_r ) ); - Node conc3 = normal_forms[other_n_index][index].eqNode( mkConcat( sk_y, sk_w ) ); - Node restr = r == d_emptyString ? s_zy : mkConcat( sk_z, sk_y ); - str_in_re = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, sk_w, - NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, - NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, restr ) ) ); - - std::vector< Node > vec_conc; - vec_conc.push_back(conc1); vec_conc.push_back(conc2); vec_conc.push_back(conc3); - vec_conc.push_back(str_in_re); - //vec_conc.push_back(sk_y.eqNode(d_emptyString).negate());//by mkskolems - conc = NodeManager::currentNM()->mkNode( kind::AND, vec_conc ); - } // normal case - - //set its antecedant to ant, to say when it is relevant - if(!str_in_re.isNull()) { - d_regexp_ant[str_in_re] = ant; - } - //we will be done - if( options::stringProcessLoop() ){ - info.d_conc = conc; - info.d_id = 8; - info.d_nf_pair[0] = normal_form_src[i]; - info.d_nf_pair[1] = normal_form_src[j]; + } + + Node split_eq; + for (unsigned r = 0; r < 2; r++) + { + Node t = r == 0 ? normal_forms[loop_n_index][loop_index] : t_yz; + split_eq = t.eqNode(d_emptyString); + Node split_eqr = Rewriter::rewrite(split_eq); + // the equality could rewrite to false + if (!split_eqr.isConst()) + { + if (!areDisequal(t, d_emptyString)) + { + // try to make t equal to empty to avoid loop + info.d_conc = nm->mkNode(kind::OR, split_eq, split_eq.negate()); + info.d_id = 4; return true; - }else{ - d_out->setIncomplete(); } + else + { + info.d_ant.push_back(split_eq.negate()); + } + } + else + { + Assert(!split_eqr.getConst()); } } + + Node ant = mkExplain(info.d_ant); + info.d_ant.clear(); + info.d_antn.push_back(ant); + + Node str_in_re; + if (s_zy == t_yz && r == d_emptyString && s_zy.isConst() + && s_zy.getConst().isRepeated()) + { + Node rep_c = nm->mkConst(s_zy.getConst().substr(0, 1)); + Trace("strings-loop") << "Special case (X)=" + << normal_forms[other_n_index][index] << " " + << std::endl; + Trace("strings-loop") << "... (C)=" << rep_c << " " << std::endl; + // special case + str_in_re = + nm->mkNode(kind::STRING_IN_REGEXP, + normal_forms[other_n_index][index], + nm->mkNode(kind::REGEXP_STAR, + nm->mkNode(kind::STRING_TO_REGEXP, rep_c))); + conc = str_in_re; + } + else if (t_yz.isConst()) + { + Trace("strings-loop") << "Strings::Loop: Const Normal Breaking." + << std::endl; + CVC4::String s = t_yz.getConst(); + unsigned size = s.size(); + std::vector vconc; + for (unsigned len = 1; len <= size; len++) + { + Node y = nm->mkConst(s.substr(0, len)); + Node z = nm->mkConst(s.substr(len, size - len)); + Node restr = s_zy; + Node cc; + if (r != d_emptyString) + { + std::vector v2(vec_r); + v2.insert(v2.begin(), y); + v2.insert(v2.begin(), z); + restr = mkConcat(z, y); + cc = Rewriter::rewrite(s_zy.eqNode(mkConcat(v2))); + } + else + { + cc = Rewriter::rewrite(s_zy.eqNode(mkConcat(z, y))); + } + if (cc == d_false) + { + continue; + } + Node conc2 = nm->mkNode( + kind::STRING_IN_REGEXP, + normal_forms[other_n_index][index], + nm->mkNode(kind::REGEXP_CONCAT, + nm->mkNode(kind::STRING_TO_REGEXP, y), + nm->mkNode(kind::REGEXP_STAR, + nm->mkNode(kind::STRING_TO_REGEXP, restr)))); + cc = cc == d_true ? conc2 : nm->mkNode(kind::AND, cc, conc2); + d_regexp_ant[conc2] = ant; + vconc.push_back(cc); + } + conc = vconc.size() == 0 ? Node::null() : vconc.size() == 1 + ? vconc[0] + : nm->mkNode(kind::OR, vconc); + } + else + { + Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking." + << std::endl; + // right + Node sk_w = mkSkolemS("w_loop"); + Node sk_y = mkSkolemS("y_loop", 1); + Node sk_z = mkSkolemS("z_loop"); + // t1 * ... * tn = y * z + Node conc1 = t_yz.eqNode(mkConcat(sk_y, sk_z)); + // s1 * ... * sk = z * y * r + vec_r.insert(vec_r.begin(), sk_y); + vec_r.insert(vec_r.begin(), sk_z); + Node conc2 = s_zy.eqNode(mkConcat(vec_r)); + Node conc3 = + normal_forms[other_n_index][index].eqNode(mkConcat(sk_y, sk_w)); + Node restr = r == d_emptyString ? s_zy : mkConcat(sk_z, sk_y); + str_in_re = + nm->mkNode(kind::STRING_IN_REGEXP, + sk_w, + nm->mkNode(kind::REGEXP_STAR, + nm->mkNode(kind::STRING_TO_REGEXP, restr))); + + std::vector vec_conc; + vec_conc.push_back(conc1); + vec_conc.push_back(conc2); + vec_conc.push_back(conc3); + vec_conc.push_back(str_in_re); + // vec_conc.push_back(sk_y.eqNode(d_emptyString).negate());//by mkskolems + conc = nm->mkNode(kind::AND, vec_conc); + } // normal case + + // set its antecedant to ant, to say when it is relevant + if (!str_in_re.isNull()) + { + d_regexp_ant[str_in_re] = ant; + } + // we will be done + if (options::stringProcessLoop()) + { + info.d_conc = conc; + info.d_id = 8; + info.d_nf_pair[0] = normal_form_src[i]; + info.d_nf_pair[1] = normal_form_src[j]; + return true; + } + d_out->setIncomplete(); return false; } diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index f79922a53..54a5b4dcb 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -207,6 +207,7 @@ Node TheoryStringsRewriter::rewriteEquality(Node node) { return NodeManager::currentNM()->mkConst(false); } + // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false ) for (unsigned r = 0; r < 2; r++) { @@ -236,6 +237,17 @@ Node TheoryStringsRewriter::rewriteEquality(Node node) } } + // ( len( s ) != len( t ) ) => ( s == t ---> false ) + // This covers cases like str.++( x, x ) == "a" ---> false + Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + Node len_eq = len0.eqNode(len1); + len_eq = Rewriter::rewrite(len_eq); + if (len_eq.isConst() && !len_eq.getConst()) + { + return returnRewrite(node, len_eq, "eq-len-deq"); + } + std::vector c[2]; for (unsigned i = 0; i < 2; i++) { @@ -1139,42 +1151,11 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { retNode = rewriteIndexof( node ); }else if( node.getKind() == kind::STRING_STRREPL ){ retNode = rewriteReplace( node ); - }else if( node.getKind() == kind::STRING_PREFIX ){ - if( node[0].isConst() && node[1].isConst() ){ - CVC4::String s = node[1].getConst(); - CVC4::String t = node[0].getConst(); - retNode = NodeManager::currentNM()->mkConst( false ); - if(s.size() >= t.size()) { - if(t == s.substr(0, t.size())) { - retNode = NodeManager::currentNM()->mkConst( true ); - } - } - } else { - Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); - Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - retNode = NodeManager::currentNM()->mkNode(kind::AND, - NodeManager::currentNM()->mkNode(kind::GEQ, lent, lens), - node[0].eqNode(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, node[1], - NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ), lens))); - } - }else if( node.getKind() == kind::STRING_SUFFIX ){ - if(node[0].isConst() && node[1].isConst()) { - CVC4::String s = node[1].getConst(); - CVC4::String t = node[0].getConst(); - retNode = NodeManager::currentNM()->mkConst( false ); - if(s.size() >= t.size()) { - if(t == s.substr(s.size() - t.size(), t.size())) { - retNode = NodeManager::currentNM()->mkConst( true ); - } - } - } else { - Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); - Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - retNode = NodeManager::currentNM()->mkNode(kind::AND, - NodeManager::currentNM()->mkNode(kind::GEQ, lent, lens), - node[0].eqNode(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, node[1], - NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens), lens))); - } + } + else if (node.getKind() == kind::STRING_PREFIX + || node.getKind() == kind::STRING_SUFFIX) + { + retNode = rewritePrefixSuffix(node); }else if(node.getKind() == kind::STRING_ITOS) { if(node[0].isConst()) { if( node[0].getConst().sgn()==-1 ){ @@ -1620,7 +1601,7 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { Node ret = NodeManager::currentNM()->mkConst(true); return returnRewrite(node, ret, "ctn-eq"); } - else if (node[0].isConst()) + if (node[0].isConst()) { CVC4::String s = node[0].getConst(); if (node[1].isConst()) @@ -1632,9 +1613,19 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { }else{ if (s.size() == 0) { - Node ret = - NodeManager::currentNM()->mkNode(kind::EQUAL, node[0], node[1]); - return returnRewrite(node, ret, "ctn-emptystr"); + Node len1 = + NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(len1, true)) + { + // we handle the false case here since the rewrite for equality + // uses this function, hence we want to conclude false if possible. + // len(x)>0 => contains( "", x ) ---> false + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-lhs-emptystr"); + } + // contains( "", x ) ---> ( "" = x ) + Node ret = node[0].eqNode(node[1]); + return returnRewrite(node, ret, "ctn-lhs-emptystr-eq"); } else if (node[1].getKind() == kind::STRING_CONCAT) { @@ -1647,6 +1638,16 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { } } } + if (node[1].isConst()) + { + CVC4::String t = node[1].getConst(); + if (t.size() == 0) + { + // contains( x, "" ) ---> true + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-rhs-emptystr"); + } + } std::vector nc1; getConcat(node[0], nc1); std::vector nc2; @@ -1926,11 +1927,20 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { if( node[1]==node[2] ){ return returnRewrite(node, node[0], "rpl-id"); } - else if (node[0].isConst() && node[0].getConst().isEmptyString()) + if (node[0].isConst()) { - return returnRewrite(node, node[0], "rpl-empty"); + CVC4::String s = node[0].getConst(); + if (s.isEmptyString()) + { + return returnRewrite(node, node[0], "rpl-empty"); + } + if (node[0] == node[2] && s.size() == 1) + { + // str.replace( "A", x, "A" ) -> "A" + return returnRewrite(node, node[0], "rpl-char-id"); + } } - else if (node[1].isConst() && node[1].getConst().isEmptyString()) + if (node[1].isConst() && node[1].getConst().isEmptyString()) { return returnRewrite(node, node[0], "rpl-rpl-empty"); } @@ -1979,6 +1989,17 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { } } + if (node[0] == node[2]) + { + // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x + Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(l1, l0)) + { + return returnRewrite(node, node[0], "rpl-rpl-len-id"); + } + } + std::vector children1; getConcat(node[1], children1); @@ -2080,6 +2101,82 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { return node; } +Node TheoryStringsRewriter::rewritePrefixSuffix(Node n) +{ + Assert(n.getKind() == kind::STRING_PREFIX + || n.getKind() == kind::STRING_SUFFIX); + bool isPrefix = n.getKind() == kind::STRING_PREFIX; + if (n[0] == n[1]) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(n, ret, "suf/prefix-eq"); + } + if (n[0].isConst()) + { + CVC4::String t = n[0].getConst(); + if (t.isEmptyString()) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(n, ret, "suf/prefix-empty-const"); + } + } + if (n[1].isConst()) + { + CVC4::String s = n[1].getConst(); + if (s.isEmptyString()) + { + Assert(!n[0].isConst()); + Node ret = n[0].eqNode(n[1]); + return returnRewrite(n, ret, "suf/prefix-empty"); + } + else if (n[0].isConst()) + { + Node ret = NodeManager::currentNM()->mkConst(false); + CVC4::String t = n[0].getConst(); + if (s.size() >= t.size()) + { + if ((isPrefix && t == s.prefix(t.size())) + || (!isPrefix && t == s.suffix(t.size()))) + { + ret = NodeManager::currentNM()->mkConst(true); + } + } + return returnRewrite(n, ret, "suf/prefix-const"); + } + else if (s.size() == 1) + { + // (str.prefix x "A") and (str.suffix x "A") are equivalent to + // (str.contains "A" x ) + Node ret = + NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]); + return returnRewrite(n, ret, "suf/prefix-ctn"); + } + } + Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]); + Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]); + Node val; + if (isPrefix) + { + val = NodeManager::currentNM()->mkConst(::CVC4::Rational(0)); + } + else + { + val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens); + } + // general reduction to equality + substr + Node retNode = n[0].eqNode( + NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens)); + // add length constraint if it cannot be shown by simple entailment check + if (!checkEntailArith(lent, lens)) + { + retNode = NodeManager::currentNM()->mkNode( + kind::AND, + retNode, + NodeManager::currentNM()->mkNode(kind::GEQ, lent, lens)); + } + return retNode; +} + void TheoryStringsRewriter::getConcat( Node n, std::vector< Node >& c ) { if( n.getKind()==kind::STRING_CONCAT || n.getKind()==kind::REGEXP_CONCAT ){ for( unsigned i=0; i