diff options
author | Andrew Reynolds <andrew.j.reynolds@gmail.com> | 2018-08-09 15:11:27 -0500 |
---|---|---|
committer | Aina Niemetz <aina.niemetz@gmail.com> | 2018-08-09 13:11:27 -0700 |
commit | 0a02fd2b69c0c0f454fc33d8028b24f4fcf431de (patch) | |
tree | c1ac5ce905728635b4e872810d68dd3f51ba01d8 /src/util | |
parent | 5052848f548aefd50cca9550b750eb537eee258c (diff) |
Fix char overflow issues in regular expression solver (#2275)
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/regexp.cpp | 33 | ||||
-rw-r--r-- | src/util/regexp.h | 16 |
2 files changed, 37 insertions, 12 deletions
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 93178b948..8d68d4e86 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -58,13 +58,25 @@ unsigned String::convertUnsignedIntToCode(unsigned i) return (i + start_code()) % num_codes(); } +String::String(const std::vector<unsigned> &s) : d_str(s) +{ +#ifdef CVC4_ASSERTIONS + for (unsigned u : d_str) + { + Assert(convertUnsignedIntToCode(u) < num_codes()); + } +#endif +} + int String::cmp(const String &y) const { if (size() != y.size()) { return size() < y.size() ? -1 : 1; } for (unsigned int i = 0; i < size(); ++i) { if (d_str[i] != y.d_str[i]) { - return getUnsignedCharAt(i) < y.getUnsignedCharAt(i) ? -1 : 1; + unsigned cp = convertUnsignedIntToCode(d_str[i]); + unsigned cpy = convertUnsignedIntToCode(y.d_str[i]); + return cp < cpy ? -1 : 1; } } return 0; @@ -207,12 +219,25 @@ std::vector<unsigned> String::toInternal(const std::string &s, i++; } } +#ifdef CVC4_ASSERTIONS + for (unsigned u : str) + { + Assert(convertUnsignedIntToCode(u) < num_codes()); + } +#endif return str; } -unsigned char String::getUnsignedCharAt(size_t pos) const { - Assert(pos < size()); - return convertUnsignedIntToChar(d_str[pos]); +unsigned String::front() const +{ + Assert(!d_str.empty()); + return d_str.front(); +} + +unsigned String::back() const +{ + Assert(!d_str.empty()); + return d_str.back(); } std::size_t String::overlap(const String &y) const { diff --git a/src/util/regexp.h b/src/util/regexp.h index e91ca61e2..2ab6b659c 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -51,6 +51,8 @@ class CVC4_PUBLIC String { * This is the cardinality of the alphabet that is representable by this * class. Notice that this must be greater than or equal to the cardinality * of the alphabet that the string theory reasons about. + * + * This must be strictly less than std::numeric_limits<unsigned>::max(). */ static inline unsigned num_codes() { return 256; } /** @@ -89,9 +91,7 @@ class CVC4_PUBLIC String { : d_str(toInternal(s, useEscSequences)) {} explicit String(const char* s, bool useEscSequences = false) : d_str(toInternal(std::string(s), useEscSequences)) {} - explicit String(const unsigned char c) - : d_str({convertCharToUnsignedInt(c)}) {} - explicit String(const std::vector<unsigned>& s) : d_str(s) {} + explicit String(const std::vector<unsigned>& s); String& operator=(const String& y) { if (this != &y) { @@ -137,9 +137,6 @@ class CVC4_PUBLIC String { /** Return the length of the string */ std::size_t size() const { return d_str.size(); } - unsigned char getFirstChar() const { return getUnsignedCharAt(0); } - unsigned char getLastChar() const { return getUnsignedCharAt(size() - 1); } - bool isRepeated() const; bool tailcmp(const String& y, int& c) const; @@ -187,8 +184,12 @@ class CVC4_PUBLIC String { bool isNumber() const; /** Returns the corresponding rational for the text of this string. */ Rational toNumber() const; - + /** get the internal unsigned representation of this string */ const std::vector<unsigned>& getVec() const { return d_str; } + /** get the internal unsigned value of the first character in this string */ + unsigned front() const; + /** get the internal unsigned value of the last character in this string */ + unsigned back() const; /** is the unsigned a digit? * The input should be the same type as the element type of d_str */ @@ -205,7 +206,6 @@ class CVC4_PUBLIC String { static std::vector<unsigned> toInternal(const std::string& s, bool useEscSequences = true); - unsigned char getUnsignedCharAt(size_t pos) const; /** * Returns a negative number if *this < y, 0 if *this and y are equal and a |