summaryrefslogtreecommitdiff
path: root/src/theory/strings/term_registry.h
blob: f0b4a74c64e9da1d6337a8cb7a74df92bd5601ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
/*********************                                                        */
/*! \file term_registry.h
 ** \verbatim
 ** Top contributors (to current version):
 **   Andrew Reynolds, Tim King, Tianyi Liang
 ** This file is part of the CVC4 project.
 ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
 ** in the top-level source directory) and their institutional affiliations.
 ** All rights reserved.  See the file COPYING in the top-level source
 ** directory for licensing information.\endverbatim
 **
 ** \brief Term registry for the theory of strings.
 **/

#include "cvc4_private.h"

#ifndef CVC4__THEORY__STRINGS__TERM_REGISTRY_H
#define CVC4__THEORY__STRINGS__TERM_REGISTRY_H

#include "context/cdhashset.h"
#include "context/cdlist.h"
#include "theory/output_channel.h"
#include "theory/strings/infer_info.h"
#include "theory/strings/sequences_stats.h"
#include "theory/strings/skolem_cache.h"
#include "theory/uf/equality_engine.h"

namespace CVC4 {
namespace theory {
namespace strings {

/**
 * This class manages all the (pre)registration tasks for terms. These tasks
 * include:
 * (1) Sending out preregistration lemmas for terms,
 * (2) Add terms to the equality engine,
 * (3) Maintaining a list of terms d_functionsTerms (for theory combination),
 * (4) Maintaining a list of input variables d_inputVars (for fmf).
 * (5) Maintaining a skolem cache. Notice that this skolem cache is the
 * official skolem cache that should be used by all modules in TheoryStrings.
 */
class TermRegistry
{
  typedef context::CDHashSet<Node, NodeHashFunction> NodeSet;
  typedef context::CDHashSet<TypeNode, TypeNodeHashFunction> TypeNodeSet;
  typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap;

 public:
  TermRegistry(context::Context* c,
               context::UserContext* u,
               eq::EqualityEngine& ee,
               OutputChannel& out,
               SequencesStatistics& statistics);
  ~TermRegistry();
  /**
   * Preregister term, called when TheoryStrings::preRegisterTerm(n) is called.
   * This does the following:
   * - Checks for illegal terms and throws a LogicException if any term is
   * not handled.
   * - Adds the appropriate terms and triggers to the equality engine.
   * - Updates information about which terms exist, including
   * d_functionsTerms and d_hasStrCode. If we are using strings finite model
   * finding (options::stringsFmf), we determine if the term n should be
   * added to d_inputVars, the set of terms of type string whose length we are
   * minimizing with its decision strategy.
   * - Setting phase requirements on n if it is a formula and we prefer
   * decisions with a particular polarity (e.g. positive regular expression
   * memberships).
   */
  void preRegisterTerm(TNode n);
  /** Register term
   *
   * This performs SAT-context-independent registration for a term n, which
   * may cause lemmas to be sent on the output channel that involve
   * "initial refinement lemmas" for n. This includes introducing proxy
   * variables for string terms and asserting that str.code terms are within
   * proper bounds.
   *
   * Effort is one of the following (TODO make enum #1881):
   * 0 : upon preregistration or internal assertion
   * 1 : upon occurrence in length term
   * 2 : before normal form computation
   * 3 : called on normal form terms
   *
   * Based on the strategy, we may choose to add these initial refinement
   * lemmas at one of the following efforts, where if it is not the given
   * effort, the call to this method does nothing.
   */
  void registerTerm(Node n, int effort);
  /** register length
   *
   * This method is called on non-constant string terms n that are "atomic"
   * with respect to length. That is, the rewritten form of len(n) is itself.
   *
   * It sends a lemma on the output channel that ensures that the length n
   * satisfies its assigned status (given by argument s).
   *
   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
   *
   * If the status is LENGTH_GEQ_ONE, we send a lemma n != "" ^ len( n ) > 0.
   *
   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
   * This method also ensures that, when applicable, the left branch is taken
   * first via calls to requirePhase.
   *
   * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used
   * e.g. when the length of n is already implied by other constraints.
   *
   * In contrast to the above functions, it makes immediate calls to the output
   * channel instead of adding them to pending lists.
   */
  void registerTermAtomic(Node n, LengthStatus s);
  //---------------------------- queries
  /** Get the skolem cache of this object */
  SkolemCache* getSkolemCache();
  /** Get all function terms that have been preregistered to this object */
  const context::CDList<TNode>& getFunctionTerms() const;
  /**
   * Get the "input variables", corresponding to the set of leaf nodes of
   * string-like type that have been preregistered as terms to this object.
   */
  const context::CDHashSet<Node, NodeHashFunction>& getInputVars() const;
  /** Returns true if any str.code terms have been preregistered */
  bool hasStringCode() const;
  //---------------------------- end queries
  //---------------------------- proxy variables
  /** Get symbolic definition
   *
   * This method returns the "symbolic definition" of n, call it n', and
   * populates the vector exp with an explanation such that exp => n = n'.
   *
   * The symbolic definition of n is the term where (maximal) subterms of n
   * are replaced by their proxy variables. For example, if we introduced
   * proxy variable v for x ++ y, then given input x ++ y = w, this method
   * returns v = w and adds v = x ++ y to exp.
   */
  Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const;
  /** Get proxy variable
   *
   * If this method returns the proxy variable for (string) term n if one
   * exists, otherwise it returns null.
   */
  Node getProxyVariableFor(Node n) const;

  /** infer substitution proxy vars
   *
   * This method attempts to (partially) convert the formula n into a
   * substitution of the form:
   *   v1 -> s1, ..., vn -> sn
   * where s1 ... sn are proxy variables and v1 ... vn are either variables
   * or constants.
   *
   * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent
   * to P ^ n, where P is the conjunction of equalities corresponding to the
   * definition of all proxy variables introduced by the theory of strings.
   *
   * For example, say that v1 was introduced as a proxy variable for "ABC", and
   * v2 was introduced as a proxy variable for "AA".
   *
   * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets:
   * vars = { x },
   * subs = { v2 },
   * unproc = {}.
   * In particular, this says that the information content of n is essentially
   * x = v2. The first and third conjunctions can be dropped from the
   * explanation since these equalities simply correspond to definitions
   * of proxy variables.
   *
   * This method is used as a performance heuristic. It can infer when the
   * explanation of a fact depends only trivially on equalities corresponding
   * to definitions of proxy variables, which can be omitted since they are
   * assumed to hold globally.
   */
  void inferSubstitutionProxyVars(Node n,
                                  std::vector<Node>& vars,
                                  std::vector<Node>& subs,
                                  std::vector<Node>& unproc) const;
  //---------------------------- end proxy variables
 private:
  /** Common constants */
  Node d_zero;
  Node d_one;
  Node d_negOne;
  /** the cardinality of the alphabet */
  uint32_t d_cardSize;
  /** Reference to equality engine of the theory of strings. */
  eq::EqualityEngine& d_ee;
  /** Reference to the output channel of the theory of strings. */
  OutputChannel& d_out;
  /** Reference to the statistics for the theory of strings/sequences. */
  SequencesStatistics& d_statistics;
  /** have we asserted any str.code terms? */
  bool d_hasStrCode;
  /** The cache of all skolems, which is owned by this class. */
  SkolemCache d_skCache;
  /** All function terms that the theory has seen in the current SAT context */
  context::CDList<TNode> d_functionsTerms;
  /**
   * The set of terms of type string that are abstracted as leaf nodes.
   */
  NodeSet d_inputVars;
  /** The user-context dependent cache of terms that have been preregistered */
  NodeSet d_preregisteredTerms;
  /** The user-context dependent cache of terms that have been registered */
  NodeSet d_registeredTerms;
  /** The types that we have preregistered */
  TypeNodeSet d_registeredTypes;
  /**
   * Map string terms to their "proxy variables". Proxy variables are used are
   * intermediate variables so that length information can be communicated for
   * constants. For example, to communicate that "ABC" has length 3, we
   * introduce a proxy variable v_{"ABC"} for "ABC", and assert:
   *   v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3
   * Notice this is required since we cannot directly write len( "ABC" ) = 3,
   * which rewrites to 3 = 3.
   * In the above example, we store "ABC" -> v_{"ABC"} in this map.
   */
  NodeNodeMap d_proxyVar;
  /**
   * Map from proxy variables to their normalized length. In the above example,
   * we store "ABC" -> 3.
   */
  NodeNodeMap d_proxyVarToLength;
  /** List of terms that we have register length for */
  NodeSet d_lengthLemmaTermsCache;
  /** Register type
   *
   * Ensures the theory solver is setup to handle string-like type tn. In
   * particular, this includes:
   * - Calling preRegisterTerm on the empty word for tn
   */
  void registerType(TypeNode tn);
  /** register term
   *
   * This method is called on non-constant string terms n. It returns a lemma
   * that should be sent on the output channel of theory of strings upon
   * registration of this term, or null if no lemma is necessary.
   *
   * If n is an atomic term, the method registerTermAtomic is called for n
   * and s = LENGTH_SPLIT and no lemma is returned.
   */
  Node getRegisterTermLemma(Node n);
  /**
   * Get the lemma required for registering the length information for
   * atomic term n given length status s. For details, see registerTermAtomic.
   *
   * Additionally, this method may map literals to a required polarity in the
   * argument reqPhase, which should be processed by a call to requiredPhase by
   * the caller of this method.
   */
  Node getRegisterTermAtomicLemma(Node n,
                                  LengthStatus s,
                                  std::map<Node, bool>& reqPhase);
};

}  // namespace strings
}  // namespace theory
}  // namespace CVC4

#endif /* CVC4__THEORY__STRINGS__TERM_REGISTRY_H */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback