summaryrefslogtreecommitdiff
path: root/src/util
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2018-08-09 15:11:27 -0500
committerAina Niemetz <aina.niemetz@gmail.com>2018-08-09 13:11:27 -0700
commit0a02fd2b69c0c0f454fc33d8028b24f4fcf431de (patch)
treec1ac5ce905728635b4e872810d68dd3f51ba01d8 /src/util
parent5052848f548aefd50cca9550b750eb537eee258c (diff)
Fix char overflow issues in regular expression solver (#2275)
Diffstat (limited to 'src/util')
-rw-r--r--src/util/regexp.cpp33
-rw-r--r--src/util/regexp.h16
2 files changed, 37 insertions, 12 deletions
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 93178b948..8d68d4e86 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -58,13 +58,25 @@ unsigned String::convertUnsignedIntToCode(unsigned i)
return (i + start_code()) % num_codes();
}
+String::String(const std::vector<unsigned> &s) : d_str(s)
+{
+#ifdef CVC4_ASSERTIONS
+ for (unsigned u : d_str)
+ {
+ Assert(convertUnsignedIntToCode(u) < num_codes());
+ }
+#endif
+}
+
int String::cmp(const String &y) const {
if (size() != y.size()) {
return size() < y.size() ? -1 : 1;
}
for (unsigned int i = 0; i < size(); ++i) {
if (d_str[i] != y.d_str[i]) {
- return getUnsignedCharAt(i) < y.getUnsignedCharAt(i) ? -1 : 1;
+ unsigned cp = convertUnsignedIntToCode(d_str[i]);
+ unsigned cpy = convertUnsignedIntToCode(y.d_str[i]);
+ return cp < cpy ? -1 : 1;
}
}
return 0;
@@ -207,12 +219,25 @@ std::vector<unsigned> String::toInternal(const std::string &s,
i++;
}
}
+#ifdef CVC4_ASSERTIONS
+ for (unsigned u : str)
+ {
+ Assert(convertUnsignedIntToCode(u) < num_codes());
+ }
+#endif
return str;
}
-unsigned char String::getUnsignedCharAt(size_t pos) const {
- Assert(pos < size());
- return convertUnsignedIntToChar(d_str[pos]);
+unsigned String::front() const
+{
+ Assert(!d_str.empty());
+ return d_str.front();
+}
+
+unsigned String::back() const
+{
+ Assert(!d_str.empty());
+ return d_str.back();
}
std::size_t String::overlap(const String &y) const {
diff --git a/src/util/regexp.h b/src/util/regexp.h
index e91ca61e2..2ab6b659c 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -51,6 +51,8 @@ class CVC4_PUBLIC String {
* This is the cardinality of the alphabet that is representable by this
* class. Notice that this must be greater than or equal to the cardinality
* of the alphabet that the string theory reasons about.
+ *
+ * This must be strictly less than std::numeric_limits<unsigned>::max().
*/
static inline unsigned num_codes() { return 256; }
/**
@@ -89,9 +91,7 @@ class CVC4_PUBLIC String {
: d_str(toInternal(s, useEscSequences)) {}
explicit String(const char* s, bool useEscSequences = false)
: d_str(toInternal(std::string(s), useEscSequences)) {}
- explicit String(const unsigned char c)
- : d_str({convertCharToUnsignedInt(c)}) {}
- explicit String(const std::vector<unsigned>& s) : d_str(s) {}
+ explicit String(const std::vector<unsigned>& s);
String& operator=(const String& y) {
if (this != &y) {
@@ -137,9 +137,6 @@ class CVC4_PUBLIC String {
/** Return the length of the string */
std::size_t size() const { return d_str.size(); }
- unsigned char getFirstChar() const { return getUnsignedCharAt(0); }
- unsigned char getLastChar() const { return getUnsignedCharAt(size() - 1); }
-
bool isRepeated() const;
bool tailcmp(const String& y, int& c) const;
@@ -187,8 +184,12 @@ class CVC4_PUBLIC String {
bool isNumber() const;
/** Returns the corresponding rational for the text of this string. */
Rational toNumber() const;
-
+ /** get the internal unsigned representation of this string */
const std::vector<unsigned>& getVec() const { return d_str; }
+ /** get the internal unsigned value of the first character in this string */
+ unsigned front() const;
+ /** get the internal unsigned value of the last character in this string */
+ unsigned back() const;
/** is the unsigned a digit?
* The input should be the same type as the element type of d_str
*/
@@ -205,7 +206,6 @@ class CVC4_PUBLIC String {
static std::vector<unsigned> toInternal(const std::string& s,
bool useEscSequences = true);
- unsigned char getUnsignedCharAt(size_t pos) const;
/**
* Returns a negative number if *this < y, 0 if *this and y are equal and a
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback