summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/parser/smt2/smt2.cpp1
-rw-r--r--src/printer/smt2/smt2_printer.cpp1
-rw-r--r--src/theory/strings/kinds2
-rw-r--r--src/theory/strings/theory_strings.cpp374
-rw-r--r--src/theory/strings/theory_strings.h38
-rw-r--r--src/theory/strings/theory_strings_rewriter.cpp28
-rw-r--r--src/theory/strings/theory_strings_rewriter.h6
-rw-r--r--src/theory/strings/theory_strings_type_rules.h4
-rw-r--r--src/util/regexp.cpp26
-rw-r--r--src/util/regexp.h48
-rw-r--r--test/regress/Makefile.tests5
-rw-r--r--test/regress/regress1/strings/code-sequence.smt214
-rw-r--r--test/regress/regress1/strings/str-code-sat.smt224
-rw-r--r--test/regress/regress1/strings/str-code-unsat-2.smt26
-rw-r--r--test/regress/regress1/strings/str-code-unsat-3.smt221
-rw-r--r--test/regress/regress1/strings/str-code-unsat.smt221
16 files changed, 507 insertions, 112 deletions
diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp
index 1d66ab0c1..09dccdfbd 100644
--- a/src/parser/smt2/smt2.cpp
+++ b/src/parser/smt2/smt2.cpp
@@ -153,6 +153,7 @@ void Smt2::addStringOperators() {
addOperator(kind::REGEXP_OPT, "re.opt");
addOperator(kind::REGEXP_RANGE, "re.range");
addOperator(kind::REGEXP_LOOP, "re.loop");
+ addOperator(kind::STRING_CODE, "str.code");
}
void Smt2::addFloatingPointOperators() {
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp
index 96bee9724..2c6a26335 100644
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -523,6 +523,7 @@ void Smt2Printer::toStream(std::ostream& out,
case kind::STRING_SUFFIX:
case kind::STRING_ITOS:
case kind::STRING_STOI:
+ case kind::STRING_CODE:
case kind::STRING_TO_REGEXP:
case kind::REGEXP_CONCAT:
case kind::REGEXP_UNION:
diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds
index 15dd5b423..34237f69e 100644
--- a/src/theory/strings/kinds
+++ b/src/theory/strings/kinds
@@ -24,6 +24,7 @@ operator STRING_PREFIX 2 "string prefixof"
operator STRING_SUFFIX 2 "string suffixof"
operator STRING_ITOS 1 "integer to string"
operator STRING_STOI 1 "string to integer (total function)"
+operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise"
#sort CHAR_TYPE \
# Cardinality::INTEGERS \
@@ -103,6 +104,7 @@ typerule STRING_PREFIX ::CVC4::theory::strings::StringPrefixOfTypeRule
typerule STRING_SUFFIX ::CVC4::theory::strings::StringSuffixOfTypeRule
typerule STRING_ITOS ::CVC4::theory::strings::StringIntToStrTypeRule
typerule STRING_STOI ::CVC4::theory::strings::StringStrToIntTypeRule
+typerule STRING_CODE ::CVC4::theory::strings::StringStrToIntTypeRule
typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index c780caca2..b77a616b3 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -32,6 +32,7 @@
using namespace std;
using namespace CVC4::context;
+using namespace CVC4::kind;
namespace CVC4 {
namespace theory {
@@ -73,13 +74,14 @@ Node TheoryStrings::TermIndex::add( TNode n, unsigned index, TheoryStrings* t, N
}
}
-
-TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
- OutputChannel& out, Valuation valuation,
+TheoryStrings::TheoryStrings(context::Context* c,
+ context::UserContext* u,
+ OutputChannel& out,
+ Valuation valuation,
const LogicInfo& logicInfo)
: Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo),
RMAXINT(LONG_MAX),
- d_notify( *this ),
+ d_notify(*this),
d_equalityEngine(d_notify, c, "theory::strings", true),
d_conflict(c, false),
d_infer(c),
@@ -98,7 +100,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
d_proxy_var(u),
d_proxy_var_to_length(u),
d_functionsTerms(c),
- d_has_extf(c, false ),
+ d_has_extf(c, false),
+ d_has_str_code(false),
d_regexp_memberships(c),
d_regexp_ucached(u),
d_regexp_ccached(c),
@@ -121,11 +124,13 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u,
getExtTheory()->addFunctionKind(kind::STRING_STRREPL);
getExtTheory()->addFunctionKind(kind::STRING_STRCTN);
getExtTheory()->addFunctionKind(kind::STRING_IN_REGEXP);
+ getExtTheory()->addFunctionKind(kind::STRING_CODE);
// The kinds we are treating as function application in congruence
- d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP);
d_equalityEngine.addFunctionKind(kind::STRING_LENGTH);
d_equalityEngine.addFunctionKind(kind::STRING_CONCAT);
+ d_equalityEngine.addFunctionKind(kind::STRING_IN_REGEXP);
+ d_equalityEngine.addFunctionKind(kind::STRING_CODE);
if( options::stringLazyPreproc() ){
d_equalityEngine.addFunctionKind(kind::STRING_STRCTN);
d_equalityEngine.addFunctionKind(kind::STRING_SUBSTR);
@@ -410,7 +415,9 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
if( d_preproc_cache.find( c_n )==d_preproc_cache.end() ){
d_preproc_cache[ c_n ] = true;
Trace("strings-process-debug") << "Process reduction for " << n << ", pol = " << pol << std::endl;
- if( n.getKind()==kind::STRING_STRCTN && pol==1 ){
+ Kind k = n.getKind();
+ if (k == kind::STRING_STRCTN && pol == 1)
+ {
Node x = n[0];
Node s = n[1];
//positive contains reduces to a equality
@@ -423,9 +430,13 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
//we've reduced this n
Trace("strings-extf-debug") << " resolve extf : " << n << " based on positive contain reduction." << std::endl;
return 1;
- }else{
- // for STRING_SUBSTR, STRING_STRCTN with pol=-1,
- // STRING_STRIDOF, STRING_ITOS, STRING_STOI, STRING_STRREPL
+ }
+ else if (k != kind::STRING_CODE)
+ {
+ Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
+ || k == STRING_ITOS
+ || k == STRING_STOI
+ || k == STRING_STRREPL);
std::vector< Node > new_nodes;
Node res = d_preproc.simplify( n, new_nodes );
Assert( res!=n );
@@ -478,6 +489,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
return false;
}
+ NodeManager* nm = NodeManager::currentNM();
// Generate model
std::vector< Node > nodes;
getEquivalenceClasses( nodes );
@@ -521,20 +533,45 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
////step 2 : assign arbitrary values for unknown lengths?
// confirmed by calculus invariant, see paper
Trace("strings-model") << "Assign to equivalence classes..." << std::endl;
+ std::map<Node, Node> pure_eq_assign;
//step 3 : assign values to equivalence classes that are pure variables
for( unsigned i=0; i<col.size(); i++ ){
std::vector< Node > pure_eq;
Trace("strings-model") << "The equivalence classes ";
- for( unsigned j=0; j<col[i].size(); j++ ) {
- Trace("strings-model") << col[i][j] << " ";
+ for (const Node& eqc : col[i])
+ {
+ Trace("strings-model") << eqc << " ";
//check if col[i][j] has only variables
- if( !col[i][j].isConst() ){
- Assert( d_normal_forms.find( col[i][j] )!=d_normal_forms.end() );
- if( d_normal_forms[col[i][j]].size()==1 ){//&& d_normal_forms[col[i][j]][0]==col[i][j] ){
- pure_eq.push_back( col[i][j] );
+ if (!eqc.isConst())
+ {
+ Assert(d_normal_forms.find(eqc) != d_normal_forms.end());
+ if (d_normal_forms[eqc].size() == 1)
+ {
+ // does it have a code?
+ if (d_has_str_code)
+ {
+ EqcInfo* eip = getOrMakeEqcInfo(eqc, false);
+ if (eip && !eip->d_code_term.get().isNull())
+ {
+ // its value must be equal to its code
+ Node ct = nm->mkNode(kind::STRING_CODE, eip->d_code_term.get());
+ Node ctv = d_valuation.getModelValue(ct);
+ unsigned cvalue =
+ ctv.getConst<Rational>().getNumerator().toUnsignedInt();
+ Trace("strings-model") << "(code: " << cvalue << ") ";
+ std::vector<unsigned> vec;
+ vec.push_back(String::convertCodeToUnsignedInt(cvalue));
+ Node mv = nm->mkConst(String(vec));
+ pure_eq_assign[eqc] = mv;
+ m->getEqualityEngine()->addTerm( mv );
+ }
+ }
+ pure_eq.push_back(eqc);
}
- }else{
- processed[col[i][j]] = col[i][j];
+ }
+ else
+ {
+ processed[eqc] = eqc;
}
}
Trace("strings-model") << "have length " << lts_values[i] << std::endl;
@@ -547,7 +584,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
lvalue++;
}
Trace("strings-model") << "*** Decide to make length of " << lvalue << std::endl;
- lts_values[i] = NodeManager::currentNM()->mkConst( Rational( lvalue ) );
+ lts_values[i] = nm->mkConst(Rational(lvalue));
values_used[ lvalue ] = true;
}
Trace("strings-model") << "Need to assign values of length " << lts_values[i] << " to equivalence classes ";
@@ -556,22 +593,33 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m)
}
Trace("strings-model") << std::endl;
-
//use type enumerator
Assert(lts_values[i].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in string model");
StringEnumeratorLength sel(lts_values[i].getConst<Rational>().getNumerator().toUnsignedInt());
- for( unsigned j=0; j<pure_eq.size(); j++ ){
- Assert( !sel.isFinished() );
- Node c = *sel;
- while( d_equalityEngine.hasTerm( c ) ){
- ++sel;
+ for (const Node& eqc : pure_eq)
+ {
+ Node c;
+ std::map<Node, Node>::iterator itp = pure_eq_assign.find(eqc);
+ if (itp == pure_eq_assign.end())
+ {
Assert( !sel.isFinished() );
c = *sel;
+ while (m->hasTerm(c))
+ {
+ ++sel;
+ Assert(!sel.isFinished());
+ c = *sel;
+ }
+ ++sel;
+ }
+ else
+ {
+ c = itp->second;
}
- ++sel;
- Trace("strings-model") << "*** Assigned constant " << c << " for " << pure_eq[j] << std::endl;
- processed[pure_eq[j]] = c;
- if (!m->assertEquality(pure_eq[j], c, true))
+ Trace("strings-model") << "*** Assigned constant " << c << " for "
+ << eqc << std::endl;
+ processed[eqc] = c;
+ if (!m->assertEquality(eqc, c, true))
{
return false;
}
@@ -645,9 +693,9 @@ void TheoryStrings::preRegisterTerm(TNode n) {
break;
}
default: {
+ registerTerm(n, 0);
TypeNode tn = n.getType();
if( tn.isString() ) {
- registerTerm( n, 0 );
// if finite model finding is enabled,
// then we minimize the length of this term if it is a variable
// but not an internally generated Skolem, or a term that does
@@ -840,8 +888,12 @@ void TheoryStrings::checkExtfReductions( int effort ) {
}
}
-TheoryStrings::EqcInfo::EqcInfo( context::Context* c ) : d_length_term(c), d_cardinality_lem_k(c), d_normalized_length(c) {
-
+TheoryStrings::EqcInfo::EqcInfo(context::Context* c)
+ : d_length_term(c),
+ d_code_term(c),
+ d_cardinality_lem_k(c),
+ d_normalized_length(c)
+{
}
TheoryStrings::EqcInfo * TheoryStrings::getOrMakeEqcInfo( Node eqc, bool doMake ) {
@@ -874,11 +926,20 @@ void TheoryStrings::conflict(TNode a, TNode b){
/** called when a new equivalance class is created */
void TheoryStrings::eqNotifyNewClass(TNode t){
- if( t.getKind() == kind::STRING_LENGTH ){
+ Kind k = t.getKind();
+ if (k == kind::STRING_LENGTH || k == kind::STRING_CODE)
+ {
Trace("strings-debug") << "New length eqc : " << t << std::endl;
Node r = d_equalityEngine.getRepresentative(t[0]);
EqcInfo * ei = getOrMakeEqcInfo( r, true );
- ei->d_length_term = t[0];
+ if (k == kind::STRING_LENGTH)
+ {
+ ei->d_length_term = t[0];
+ }
+ else
+ {
+ ei->d_code_term = t[0];
+ }
//we care about the length of this string
registerTerm( t[0], 1 );
}else{
@@ -895,6 +956,10 @@ void TheoryStrings::eqNotifyPreMerge(TNode t1, TNode t2){
if( !e2->d_length_term.get().isNull() ){
e1->d_length_term.set( e2->d_length_term );
}
+ if (!e2->d_code_term.get().isNull())
+ {
+ e1->d_code_term.set(e2->d_code_term);
+ }
if( e2->d_cardinality_lem_k.get()>e1->d_cardinality_lem_k.get() ) {
e1->d_cardinality_lem_k.set( e2->d_cardinality_lem_k );
}
@@ -2013,64 +2078,164 @@ void TheoryStrings::checkNormalForms(){
}
}
}
- if( !hasProcessed() ){
- Trace("strings-process") << "Normalize equivalence classes...." << std::endl;
- //calculate normal forms for each equivalence class, possibly adding splitting lemmas
- d_normal_forms.clear();
- d_normal_forms_exp.clear();
- std::map< Node, Node > nf_to_eqc;
- std::map< Node, Node > eqc_to_nf;
- std::map< Node, Node > eqc_to_exp;
- for( unsigned i=0; i<d_strings_eqc.size(); i++ ) {
- Node eqc = d_strings_eqc[i];
- Trace("strings-process-debug") << "- Verify normal forms are the same for " << eqc << std::endl;
- normalizeEquivalenceClass( eqc );
- Trace("strings-debug") << "Finished normalizing eqc..." << std::endl;
+ if (hasProcessed())
+ {
+ return;
+ }
+ Trace("strings-process") << "Normalize equivalence classes...." << std::endl;
+ // calculate normal forms for each equivalence class, possibly adding
+ // splitting lemmas
+ d_normal_forms.clear();
+ d_normal_forms_exp.clear();
+ std::map<Node, Node> nf_to_eqc;
+ std::map<Node, Node> eqc_to_nf;
+ std::map<Node, Node> eqc_to_exp;
+ for (const Node& eqc : d_strings_eqc)
+ {
+ Trace("strings-process-debug") << "- Verify normal forms are the same for "
+ << eqc << std::endl;
+ normalizeEquivalenceClass(eqc);
+ Trace("strings-debug") << "Finished normalizing eqc..." << std::endl;
+ if (hasProcessed())
+ {
+ return;
+ }
+ Node nf_term = mkConcat(d_normal_forms[eqc]);
+ std::map<Node, Node>::iterator itn = nf_to_eqc.find(nf_term);
+ if (itn != nf_to_eqc.end())
+ {
+ // two equivalence classes have same normal form, merge
+ std::vector<Node> nf_exp;
+ nf_exp.push_back(mkAnd(d_normal_forms_exp[eqc]));
+ nf_exp.push_back(eqc_to_exp[itn->second]);
+ Node eq =
+ d_normal_forms_base[eqc].eqNode(d_normal_forms_base[itn->second]);
+ sendInference(nf_exp, eq, "Normal_Form");
if( hasProcessed() ){
return;
- }else{
- Node nf_term = mkConcat( d_normal_forms[eqc] );
- std::map< Node, Node >::iterator itn = nf_to_eqc.find( nf_term );
- if( itn!=nf_to_eqc.end() ){
- //two equivalence classes have same normal form, merge
- std::vector< Node > nf_exp;
- nf_exp.push_back( mkAnd( d_normal_forms_exp[eqc] ) );
- nf_exp.push_back( eqc_to_exp[itn->second] );
- Node eq = d_normal_forms_base[eqc].eqNode( d_normal_forms_base[itn->second] );
- sendInference( nf_exp, eq, "Normal_Form" );
- } else {
- nf_to_eqc[nf_term] = eqc;
- eqc_to_nf[eqc] = nf_term;
- eqc_to_exp[eqc] = mkAnd( d_normal_forms_exp[eqc] );
- }
}
- Trace("strings-process-debug") << "Done verifying normal forms are the same for " << eqc << std::endl;
}
- if( !hasProcessed() ){
- if(Trace.isOn("strings-nf")) {
- Trace("strings-nf") << "**** Normal forms are : " << std::endl;
- for( std::map< Node, Node >::iterator it = eqc_to_exp.begin(); it != eqc_to_exp.end(); ++it ){
- Trace("strings-nf") << " N[" << it->first << "] (base " << d_normal_forms_base[it->first] << ") = " << eqc_to_nf[it->first] << std::endl;
- Trace("strings-nf") << " exp: " << it->second << std::endl;
+ else
+ {
+ nf_to_eqc[nf_term] = eqc;
+ eqc_to_nf[eqc] = nf_term;
+ eqc_to_exp[eqc] = mkAnd(d_normal_forms_exp[eqc]);
+ }
+ Trace("strings-process-debug")
+ << "Done verifying normal forms are the same for " << eqc << std::endl;
+ }
+ if (Trace.isOn("strings-nf"))
+ {
+ Trace("strings-nf") << "**** Normal forms are : " << std::endl;
+ for (std::map<Node, Node>::iterator it = eqc_to_exp.begin();
+ it != eqc_to_exp.end();
+ ++it)
+ {
+ Trace("strings-nf") << " N[" << it->first << "] (base "
+ << d_normal_forms_base[it->first]
+ << ") = " << eqc_to_nf[it->first] << std::endl;
+ Trace("strings-nf") << " exp: " << it->second << std::endl;
+ }
+ Trace("strings-nf") << std::endl;
+ }
+ checkExtfEval(1);
+ Trace("strings-process-debug")
+ << "Done check extended functions re-eval, addedFact = "
+ << !d_pending.empty() << " " << !d_lemma_cache.empty()
+ << ", d_conflict = " << d_conflict << std::endl;
+ if (hasProcessed())
+ {
+ return;
+ }
+ if (!options::stringEagerLen())
+ {
+ checkLengthsEqc();
+ if (hasProcessed())
+ {
+ return;
+ }
+ }
+ // process disequalities between equivalence classes
+ checkDeqNF();
+ Trace("strings-process-debug")
+ << "Done check disequalities, addedFact = " << !d_pending.empty() << " "
+ << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
+ if (hasProcessed())
+ {
+ return;
+ }
+ // ensure that lemmas regarding str.code been added for each constant string
+ // of length one
+ if (d_has_str_code)
+ {
+ NodeManager* nm = NodeManager::currentNM();
+ // str.code applied to the code term for each equivalence class that has a
+ // code term but is not a constant
+ std::vector<Node> nconst_codes;
+ // str.code applied to the proxy variables for each equivalence classes that
+ // are constants of size one
+ std::vector<Node> const_codes;
+ for (const Node& eqc : d_strings_eqc)
+ {
+ if (d_normal_forms[eqc].size() == 1 && d_normal_forms[eqc][0].isConst())
+ {
+ Node c = d_normal_forms[eqc][0];
+ Trace("strings-code-debug") << "Get proxy variable for " << c
+ << std::endl;
+ Node cc = nm->mkNode(kind::STRING_CODE, c);
+ cc = Rewriter::rewrite(cc);
+ Assert(cc.isConst());
+ NodeNodeMap::const_iterator it = d_proxy_var.find(c);
+ AlwaysAssert(it != d_proxy_var.end());
+ Node vc = nm->mkNode(kind::STRING_CODE, (*it).second);
+ if (!areEqual(cc, vc))
+ {
+ sendInference(d_empty_vec, cc.eqNode(vc), "Code_Proxy");
}
- Trace("strings-nf") << std::endl;
+ const_codes.push_back(vc);
}
- checkExtfEval( 1 );
- Trace("strings-process-debug") << "Done check extended functions re-eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
- if( !hasProcessed() ){
- if( !options::stringEagerLen() ){
- checkLengthsEqc();
- if( hasProcessed() ){
- return;
- }
+ else
+ {
+ EqcInfo* ei = getOrMakeEqcInfo(eqc, false);
+ if (ei && !ei->d_code_term.get().isNull())
+ {
+ Node vc = nm->mkNode(kind::STRING_CODE, ei->d_code_term.get());
+ nconst_codes.push_back(vc);
+ }
+ }
+ }
+ if (hasProcessed())
+ {
+ return;
+ }
+ // now, ensure that str.code is injective
+ std::vector<Node> cmps;
+ cmps.insert(cmps.end(), const_codes.rbegin(), const_codes.rend());
+ cmps.insert(cmps.end(), nconst_codes.rbegin(), nconst_codes.rend());
+ for (unsigned i = 0, num_ncc = nconst_codes.size(); i < num_ncc; i++)
+ {
+ Node c1 = nconst_codes[i];
+ cmps.pop_back();
+ for (const Node& c2 : cmps)
+ {
+ Trace("strings-code-debug") << "Compare codes : " << c1 << " " << c2
+ << std::endl;
+ if (!areDisequal(c1, c2))
+ {
+ Node eqn = c1[0].eqNode(c2[0]);
+ Node eq = c1.eqNode(c2);
+ Node inj_lem = nm->mkNode(kind::OR, eq.negate(), eqn);
+ sendInference(d_empty_vec, inj_lem, "Code_Inj");
}
- //process disequalities between equivalence classes
- checkDeqNF();
- Trace("strings-process-debug") << "Done check disequalities, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
}
}
- Trace("strings-solve") << "Finished check normal forms, #lemmas = " << d_lemma_cache.size() << ", conflict = " << d_conflict << std::endl;
}
+ Trace("strings-process-debug")
+ << "Done check code, addedFact = " << !d_pending.empty() << " "
+ << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl;
+ Trace("strings-solve") << "Finished check normal forms, #lemmas = "
+ << d_lemma_cache.size()
+ << ", conflict = " << d_conflict << std::endl;
}
//compute d_normal_forms_(base,exp,exp_depend)[eqc]
@@ -3278,21 +3443,25 @@ bool TheoryStrings::isNormalFormPair2( Node n1, Node n2 ) {
}
void TheoryStrings::registerTerm( Node n, int effort ) {
- // 0 : upon preregistration or internal assertion
- // 1 : upon occurrence in length term
- // 2 : before normal form computation
- // 3 : called on normal form terms
- bool do_register = false;
- if( options::stringEagerLen() ){
- do_register = effort==0;
- }else{
- do_register = effort>0 || n.getKind()!=kind::STRING_CONCAT;
+ TypeNode tn = n.getType();
+ bool do_register = true;
+ if (!tn.isString())
+ {
+ if (options::stringEagerLen())
+ {
+ do_register = effort == 0;
+ }
+ else
+ {
+ do_register = effort > 0 || n.getKind() != kind::STRING_CONCAT;
+ }
}
if( do_register ){
if(d_registered_terms_cache.find(n) == d_registered_terms_cache.end()) {
d_registered_terms_cache.insert(n);
Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl;
- if(n.getType().isString()) {
+ if (tn.isString())
+ {
//register length information:
// for variables, split on empty vs positive length
// for concat/const/replace, introduce proxy var and state length relation
@@ -3344,10 +3513,25 @@ void TheoryStrings::registerTerm( Node n, int effort ) {
Trace("strings-lemma-debug") << " prerewrite : " << skl.eqNode( lsum ) << std::endl;
Trace("strings-assert") << "(assert " << ceq << ")" << std::endl;
d_out->lemma(ceq);
-
}
- } else {
- AlwaysAssert(false, "String Terms only in registerTerm.");
+ }
+ else if (n.getKind() == kind::STRING_CODE)
+ {
+ d_has_str_code = true;
+ NodeManager* nm = NodeManager::currentNM();
+ // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 )
+ Node neg_one = nm->mkConst(Rational(-1));
+ Node code_len = mkLength(n[0]).eqNode(d_one);
+ Node code_eq_neg1 = n.eqNode(nm->mkConst(Rational(-1)));
+ Node code_range = nm->mkNode(
+ kind::AND,
+ nm->mkNode(kind::GEQ, n, d_zero),
+ nm->mkNode(
+ kind::LT, n, nm->mkConst(Rational(CVC4::String::num_codes()))));
+ Node lem = nm->mkNode(kind::ITE, code_len, code_range, code_eq_neg1);
+ Trace("strings-lemma") << "Strings::Lemma CODE : " << lem << std::endl;
+ Trace("strings-assert") << "(assert " << lem << ")" << std::endl;
+ d_out->lemma(lem);
}
}
}
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 22406adef..85d2754a8 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -294,18 +294,31 @@ private:
EqualityStatus getEqualityStatus(TNode a, TNode b) override;
private:
+ /** SAT-context-dependent information about an equivalence class */
class EqcInfo {
public:
EqcInfo( context::Context* c );
~EqcInfo(){}
- //constant in this eqc
+ /**
+ * If non-null, this is a term x from this eq class such that str.len( x )
+ * occurs as a term in this SAT context.
+ */
context::CDO< Node > d_length_term;
+ /**
+ * If non-null, this is a term x from this eq class such that str.code( x )
+ * occurs as a term in this SAT context.
+ */
+ context::CDO<Node> d_code_term;
context::CDO< unsigned > d_cardinality_lem_k;
- // 1 = added length lemma
context::CDO< Node > d_normalized_length;
};
/** map from representatives to information necessary for equivalence classes */
std::map< Node, EqcInfo* > d_eqc_info;
+ /**
+ * Get the above information for equivalence class eqc. If doMake is true,
+ * we construct a new information class if one does not exist. The term eqc
+ * should currently be a representative of the equality engine of this class.
+ */
EqcInfo * getOrMakeEqcInfo( Node eqc, bool doMake = true );
//maintain which concat terms have the length lemma instantiated
NodeNodeMap d_proxy_var;
@@ -315,6 +328,8 @@ private:
private:
//any non-reduced extended functions exist
context::CDO< bool > d_has_extf;
+ /** have we asserted any str.code terms? */
+ bool d_has_str_code;
// static information about extf
class ExtfInfo {
public:
@@ -448,7 +463,24 @@ private:
void addToExplanation(Node a, Node b, std::vector<Node>& exp);
void addToExplanation(Node lit, std::vector<Node>& exp);
- // register term
+ /** Register term
+ *
+ * This performs SAT-context-independent registration for a term n, which
+ * may cause lemmas to be sent on the output channel that involve
+ * "initial refinement lemmas" for n. This includes introducing proxy
+ * variables for string terms and asserting that str.code terms are within
+ * proper bounds.
+ *
+ * Effort is one of the following (TODO make enum #1881):
+ * 0 : upon preregistration or internal assertion
+ * 1 : upon occurrence in length term
+ * 2 : before normal form computation
+ * 3 : called on normal form terms
+ *
+ * Based on the strategy, we may choose to add these initial refinement
+ * lemmas at one of the following efforts, where if it is not the given
+ * effort, the call to this method does nothing.
+ */
void registerTerm(Node n, int effort);
// send lemma
void sendInference(std::vector<Node>& exp,
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 1c885434c..cfd0f6e73 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -1224,6 +1224,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
} else if(node.getKind() == kind::STRING_IN_REGEXP) {
retNode = rewriteMembership(node);
}
+ else if (node.getKind() == STRING_CODE)
+ {
+ retNode = rewriteStringCode(node);
+ }
Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl;
if( orig!=retNode ){
@@ -2258,6 +2262,30 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n)
return retNode;
}
+Node TheoryStringsRewriter::rewriteStringCode(Node n)
+{
+ Assert(n.getKind() == kind::STRING_CODE);
+ if (n[0].isConst())
+ {
+ CVC4::String s = n[0].getConst<String>();
+ Node ret;
+ if (s.size() == 1)
+ {
+ std::vector<unsigned> vec = s.getVec();
+ Assert(vec.size() == 1);
+ ret = NodeManager::currentNM()->mkConst(
+ Rational(CVC4::String::convertUnsignedIntToCode(vec[0])));
+ }
+ else
+ {
+ ret = NodeManager::currentNM()->mkConst(Rational(-1));
+ }
+ return returnRewrite(n, ret, "code-eval");
+ }
+
+ return n;
+}
+
void TheoryStringsRewriter::getConcat( Node n, std::vector< Node >& c ) {
if( n.getKind()==kind::STRING_CONCAT || n.getKind()==kind::REGEXP_CONCAT ){
for( unsigned i=0; i<n.getNumChildren(); i++ ){
diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h
index 9671fa815..f7e65f3a9 100644
--- a/src/theory/strings/theory_strings_rewriter.h
+++ b/src/theory/strings/theory_strings_rewriter.h
@@ -109,6 +109,12 @@ private:
* Returns the rewritten form of node.
*/
static Node rewritePrefixSuffix(Node node);
+ /** rewrite str.code
+ * This is the entry point for post-rewriting terms n of the form
+ * str.code( t )
+ * Returns the rewritten form of node.
+ */
+ static Node rewriteStringCode(Node node);
/** gets the "vector form" of term n, adds it to c.
* For example:
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h
index 03267abcd..b4629b338 100644
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -228,7 +228,9 @@ public:
if( check ) {
TypeNode t = n[0].getType(check);
if (!t.isString()) {
- throw TypeCheckingExceptionPrivate(n, "expecting a string term in string to int 0");
+ std::stringstream ss;
+ ss << "expecting a string term in argument of " << n.getKind();
+ throw TypeCheckingExceptionPrivate(n, ss.str());
}
}
return nodeManager->integerType();
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 9aaad522a..4cbda5147 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -32,6 +32,32 @@ namespace CVC4 {
static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
+unsigned String::convertCharToUnsignedInt(unsigned char c)
+{
+ return convertCodeToUnsignedInt(static_cast<unsigned>(c));
+}
+unsigned char String::convertUnsignedIntToChar(unsigned i)
+{
+ Assert(i < num_codes());
+ return static_cast<unsigned char>(convertUnsignedIntToCode(i));
+}
+bool String::isPrintable(unsigned i)
+{
+ Assert(i < num_codes());
+ unsigned char c = convertUnsignedIntToChar(i);
+ return (c >= ' ' && c <= '~');
+}
+unsigned String::convertCodeToUnsignedInt(unsigned c)
+{
+ Assert(c < num_codes());
+ return (c < start_code() ? c + num_codes() : c) - start_code();
+}
+unsigned String::convertUnsignedIntToCode(unsigned i)
+{
+ Assert(i < num_codes());
+ return (i + start_code()) % num_codes();
+}
+
int String::cmp(const String &y) const {
if (size() != y.size()) {
return size() < y.size() ? -1 : 1;
diff --git a/src/util/regexp.h b/src/util/regexp.h
index 42d232259..c8b491475 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -28,21 +28,43 @@
namespace CVC4 {
+/** The CVC4 string class
+ *
+ * This data structure is the domain of values for the string type. It can also
+ * be used as a generic utility for representing strings.
+ */
class CVC4_PUBLIC String {
public:
- static unsigned convertCharToUnsignedInt(unsigned char c) {
- unsigned i = c;
- i = i + 191;
- return (i >= 256 ? i - 256 : i);
- }
- static unsigned char convertUnsignedIntToChar(unsigned i) {
- unsigned ii = i + 65;
- return (unsigned char)(ii >= 256 ? ii - 256 : ii);
- }
- static bool isPrintable(unsigned i) {
- unsigned char c = convertUnsignedIntToChar(i);
- return (c >= ' ' && c <= '~'); // isprint( (int)c );
- }
+ /**
+ * The start ASCII code. In our string representation below, we represent
+ * characters using a vector d_vec of unsigned integers. We refer to this as
+ * the "internal representation" for the string.
+ *
+ * We make unsigned integer 0 correspond to the 65th character ("A") in the
+ * ASCII alphabet to make models intuitive. In particular, say if we have
+ * a set of string variables that are distinct but otherwise unconstrained,
+ * then the model may assign them "A", "B", "C", ...
+ */
+ static inline unsigned start_code() { return 65; }
+ /**
+ * This is the cardinality of the alphabet that is representable by this
+ * class. Notice that this must be greater than or equal to the cardinality
+ * of the alphabet that the string theory reasons about.
+ */
+ static inline unsigned num_codes() { return 256; }
+ /**
+ * Convert unsigned char to the unsigned used in the internal representation
+ * in d_vec below.
+ */
+ static unsigned convertCharToUnsignedInt(unsigned char c);
+ /** Convert the internal unsigned to a unsigned char. */
+ static unsigned char convertUnsignedIntToChar(unsigned i);
+ /** Does the internal unsigned correspond to a printable character? */
+ static bool isPrintable(unsigned i);
+ /** get the internal unsigned for ASCII code c. */
+ static unsigned convertCodeToUnsignedInt(unsigned c);
+ /** get the ASCII code number that internal unsigned i corresponds to. */
+ static unsigned convertUnsignedIntToCode(unsigned i);
/** constructors for String
*
diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests
index 0a0732753..8584eeca9 100644
--- a/test/regress/Makefile.tests
+++ b/test/regress/Makefile.tests
@@ -1410,6 +1410,7 @@ REG1_TESTS = \
regress1/strings/cmu-5042-0707-2.smt2 \
regress1/strings/cmu-inc-nlpp-071516.smt2 \
regress1/strings/cmu-substr-rw.smt2 \
+ regress1/strings/code-sequence.smt2 \
regress1/strings/crash-1019.smt2 \
regress1/strings/csp-prefix-exp-bug.smt2 \
regress1/strings/double-replace.smt2 \
@@ -1452,6 +1453,10 @@ REG1_TESTS = \
regress1/strings/string-unsound-sem.smt2 \
regress1/strings/strings-index-empty.smt2 \
regress1/strings/strip-endpt-sound.smt2 \
+ regress1/strings/str-code-sat.smt2 \
+ regress1/strings/str-code-unsat.smt2 \
+ regress1/strings/str-code-unsat-2.smt2 \
+ regress1/strings/str-code-unsat-3.smt2 \
regress1/strings/substr001.smt2 \
regress1/strings/type002.smt2 \
regress1/strings/type003.smt2 \
diff --git a/test/regress/regress1/strings/code-sequence.smt2 b/test/regress/regress1/strings/code-sequence.smt2
new file mode 100644
index 000000000..2654017d4
--- /dev/null
+++ b/test/regress/regress1/strings/code-sequence.smt2
@@ -0,0 +1,14 @@
+(set-logic SLIA)
+(set-option :strings-exp true)
+(set-option :strings-fmf true)
+(set-option :fmf-bound true)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (forall ((n Int)) (=> (and (<= 0 n) (< n (str.len x)))
+(and
+(<= 60 (str.code (str.at x n)))
+(<= (str.code (str.at x n)) 90)
+(< (+ 1 (str.code (str.at x (- n 1)))) (str.code (str.at x n)))
+))))
+(assert (> (str.len x) 3))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-sat.smt2 b/test/regress/regress1/strings/str-code-sat.smt2
new file mode 100644
index 000000000..1acc091c1
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-sat.smt2
@@ -0,0 +1,24 @@
+(set-logic QF_SLIA)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(declare-fun w () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (str.len w) 1))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 141))
+
+(assert (distinct x y z w "A" "B" "C" "D" "AA"))
+
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat-2.smt2 b/test/regress/regress1/strings/str-code-unsat-2.smt2
new file mode 100644
index 000000000..38116061e
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat-2.smt2
@@ -0,0 +1,6 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(assert (= (str.len x) 1))
+(assert (or (< (str.code x) 0) (> (str.code x) 10000000000000000000000000000)))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat-3.smt2 b/test/regress/regress1/strings/str-code-unsat-3.smt2
new file mode 100644
index 000000000..fa34785c2
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat-3.smt2
@@ -0,0 +1,21 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 141))
+
+(assert (distinct x y z "B" "C" "D" "E"))
+
+(check-sat)
diff --git a/test/regress/regress1/strings/str-code-unsat.smt2 b/test/regress/regress1/strings/str-code-unsat.smt2
new file mode 100644
index 000000000..6a3e9062b
--- /dev/null
+++ b/test/regress/regress1/strings/str-code-unsat.smt2
@@ -0,0 +1,21 @@
+(set-logic QF_SLIA)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+
+(assert (>= (str.code x) 65))
+(assert (<= (str.code x) 75))
+
+(assert (>= (str.code y) 65))
+(assert (<= (str.code y) 75))
+
+(assert (>= (str.code z) 65))
+(assert (<= (str.code z) 75))
+
+(assert (= (+ (str.code x) (str.code y)) 140))
+(assert (= (+ (str.code x) (str.code z)) 140))
+
+(assert (distinct x y z))
+
+(check-sat)
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback