summaryrefslogtreecommitdiff
path: root/src/theory/strings/regexp_entail.h
blob: b0511bd53ee9dcef877115e31ccc830f952d1861 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Entailment tests involving regular expressions.
 */

#include "cvc5_private.h"

#ifndef CVC5__THEORY__STRINGS__REGEXP_ENTAIL_H
#define CVC5__THEORY__STRINGS__REGEXP_ENTAIL_H

#include <climits>
#include <utility>
#include <vector>

#include "expr/attribute.h"
#include "theory/strings/arith_entail.h"
#include "theory/strings/rewrites.h"
#include "theory/theory_rewriter.h"
#include "theory/type_enumerator.h"

namespace cvc5 {
namespace theory {
namespace strings {

class RegExpEntail
{
 public:
  RegExpEntail(Rewriter* r);
  /** simple regular expression consume
   *
   * This method is called when we are rewriting a membership of the form
   *   s1 ++ ... ++ sn in r1 ++ ... ++ rm
   * We have that mchildren consists of the strings s1...sn, and children
   * consists of the regular expressions r1...rm.
   *
   * This method tries to strip off parts of the concatenation terms. It updates
   * the vectors such that the resulting vectors are such that the membership
   * mchildren[n'...n''] in children[m'...m''] is equivalent to the input
   * membership. The argument dir indicates the direction to consider, where
   * 0 means strip off the front, 1 off the back, and < 0 off of both.
   *
   * If this method returns the false node, then we have inferred that no
   * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or
   * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null
   * node.
   *
   * For example, given input
   *   mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0,
   * this method updates:
   *   mchildren = { "b", x }, children = { ("cd")* }
   * and returns null.
   *
   * For example, given input
   *   { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1,
   * this method updates:
   *   { "b" }, { [[y]] }
   * where [[.]] denotes str.to.re, and returns null.
   *
   * Notice that the above requirement for returning false is stronger than
   * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false.
   * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false
   * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb".
   * We do not return false even though the above membership is equivalent
   * to false. We do this because the function is used e.g. to test whether a
   * possible unrolling leads to a conflict. This is demonstrated by the
   * following examples:
   *
   * For example, given input
   *   { "bb", x }, { "b", ("a")* } and dir=-1,
   * this method updates:
   *   { "b" }, { ("a")* }
   * and returns null.
   *
   * For example, given input
   *   { "cb", x }, { "b", ("a")* } and dir=-1,
   * this method leaves children and mchildren unchanged and returns false.
   *
   * Notice that based on this, we can determine that:
   *   "cb" ++ x  in ( "b" ++ ("a")* )*
   * is equivalent to false, whereas we cannot determine that:
   *   "bb" ++ x  in ( "b" ++ ("a")* )*
   * is equivalent to false.
   */
  static Node simpleRegexpConsume(std::vector<Node>& mchildren,
                                  std::vector<Node>& children,
                                  int dir = -1);
  /**
   * Is t a constant regular expression? A constant regular expression
   * is one with no non-constant (RE or string subterms) and does not contain
   * any non-standard RE terms like re.range applied to non-character
   * arguments.
   */
  static bool isConstRegExp(TNode t);
  /**
   * Does the substring of s starting at index_start occur in constant regular
   * expression r?
   */
  static bool testConstStringInRegExp(cvc5::String& s,
                                      unsigned index_start,
                                      TNode r);
  /** Does regular expression node have (str.to.re "") as a child? */
  static bool hasEpsilonNode(TNode node);
  /** get length for regular expression
   *
   * Given regular expression n, if this method returns a non-null value c, then
   * x in n entails len( x ) = c.
   */
  static Node getFixedLengthForRegexp(TNode n);

  /**
   * Get constant lower or upper bound on the lengths of strings that occur in
   * regular expression n. Return null if a constant bound cannot be determined.
   * This method will always worst case return 0 as a lower bound.
   */
  Node getConstantBoundLengthForRegexp(TNode n, bool isLower = true) const;
  /**
   * Returns true if we can show that the regular expression `r1` includes
   * the regular expression `r2` (i.e. `r1` matches a superset of sequences
   * that `r2` matches). This method only works on a fragment of regular
   * expressions, specifically regular expressions that pass the
   * `isSimpleRegExp` check.
   *
   * @param r1 The regular expression that may include `r2` (must be in
   *           rewritten form)
   * @param r2 The regular expression that may be included by `r1` (must be
   *           in rewritten form)
   * @return True if the inclusion can be shown, false otherwise
   */
  static bool regExpIncludes(Node r1, Node r2);

 private:
  /** Set bound cache, used for getConstantBoundLengthForRegexp */
  static void setConstantBoundCache(TNode n, Node ret, bool isLower);
  /**
   * Get bound cache, store in c and return true if the bound for n has been
   * computed. Used for getConstantBoundLengthForRegexp.
   */
  static bool getConstantBoundCache(TNode n, bool isLower, Node& c);
  /** The underlying rewriter */
  Rewriter* d_rewriter;
  /** Arithmetic entailment module */
  ArithEntail d_aent;
  /** Common constants */
  Node d_zero;
  Node d_one;
};

}  // namespace strings
}  // namespace theory
}  // namespace cvc5

#endif /* CVC5__THEORY__STRINGS__REGEXP_ENTAIL_H */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback