summaryrefslogtreecommitdiff
path: root/src/util/regexp.cpp
diff options
context:
space:
mode:
authorTim King <taking@google.com>2017-07-13 17:35:00 -0700
committerTim King <taking@google.com>2017-07-13 17:35:00 -0700
commita94318b0b1f0c4b8a2a4d6757b073626af06deea (patch)
tree848beed0fc906131d2998b3018116ccc3e855e7e /src/util/regexp.cpp
parent21f1b73de084ec182718240010c5e4abb05bff5b (diff)
Cleaning up the CVC4::String class.
Diffstat (limited to 'src/util/regexp.cpp')
-rw-r--r--src/util/regexp.cpp368
1 files changed, 294 insertions, 74 deletions
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 9915d480d..6afadfd5b 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -17,105 +17,194 @@
#include "util/regexp.h"
+#include <algorithm>
+#include <climits>
#include <iomanip>
#include <iostream>
+#include <sstream>
+
+#include "base/cvc4_assert.h"
+#include "base/exception.h"
using namespace std;
namespace CVC4 {
-void String::toInternal(const std::string &s) {
- d_str.clear();
- unsigned i=0;
- while(i < s.size()) {
- if(s[i] == '\\') {
+static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
+
+int String::cmp(const String &y) const {
+ if (size() != y.size()) {
+ return size() < y.size() ? 1 : -1;
+ }
+ for (unsigned int i = 0; i < size(); ++i) {
+ if (d_str[i] != y.d_str[i]) {
+ return getUnsignedCharAt(i) < y.getUnsignedCharAt(i) ? 1 : -1;
+ }
+ }
+ return 0;
+}
+
+String String::concat(const String &other) const {
+ std::vector<unsigned int> ret_vec(d_str);
+ ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end());
+ return String(ret_vec);
+}
+
+bool String::strncmp(const String &y, const std::size_t np) const {
+ std::size_t n = np;
+ std::size_t b = (size() >= y.size()) ? size() : y.size();
+ std::size_t s = (size() <= y.size()) ? size() : y.size();
+ if (n > s) {
+ if (b == s) {
+ n = s;
+ } else {
+ return false;
+ }
+ }
+ for (std::size_t i = 0; i < n; ++i) {
+ if (d_str[i] != y.d_str[i]) return false;
+ }
+ return true;
+}
+
+bool String::rstrncmp(const String &y, const std::size_t np) const {
+ std::size_t n = np;
+ std::size_t b = (size() >= y.size()) ? size() : y.size();
+ std::size_t s = (size() <= y.size()) ? size() : y.size();
+ if (n > s) {
+ if (b == s) {
+ n = s;
+ } else {
+ return false;
+ }
+ }
+ for (std::size_t i = 0; i < n; ++i) {
+ if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false;
+ }
+ return true;
+}
+
+std::vector<unsigned> String::toInternal(const std::string &s) {
+ std::vector<unsigned> str;
+ unsigned i = 0;
+ while (i < s.size()) {
+ if (s[i] == '\\') {
i++;
- if(i < s.size()) {
- switch(s[i]) {
- case 'n': {d_str.push_back( convertCharToUnsignedInt('\n') );i++;} break;
- case 't': {d_str.push_back( convertCharToUnsignedInt('\t') );i++;} break;
- case 'v': {d_str.push_back( convertCharToUnsignedInt('\v') );i++;} break;
- case 'b': {d_str.push_back( convertCharToUnsignedInt('\b') );i++;} break;
- case 'r': {d_str.push_back( convertCharToUnsignedInt('\r') );i++;} break;
- case 'f': {d_str.push_back( convertCharToUnsignedInt('\f') );i++;} break;
- case 'a': {d_str.push_back( convertCharToUnsignedInt('\a') );i++;} break;
- case '\\': {d_str.push_back( convertCharToUnsignedInt('\\') );i++;} break;
+ if (i < s.size()) {
+ switch (s[i]) {
+ case 'n': {
+ str.push_back(convertCharToUnsignedInt('\n'));
+ i++;
+ } break;
+ case 't': {
+ str.push_back(convertCharToUnsignedInt('\t'));
+ i++;
+ } break;
+ case 'v': {
+ str.push_back(convertCharToUnsignedInt('\v'));
+ i++;
+ } break;
+ case 'b': {
+ str.push_back(convertCharToUnsignedInt('\b'));
+ i++;
+ } break;
+ case 'r': {
+ str.push_back(convertCharToUnsignedInt('\r'));
+ i++;
+ } break;
+ case 'f': {
+ str.push_back(convertCharToUnsignedInt('\f'));
+ i++;
+ } break;
+ case 'a': {
+ str.push_back(convertCharToUnsignedInt('\a'));
+ i++;
+ } break;
+ case '\\': {
+ str.push_back(convertCharToUnsignedInt('\\'));
+ i++;
+ } break;
case 'x': {
- if(i + 2 < s.size()) {
- if(isxdigit(s[i+1]) && isxdigit(s[i+2])) {
- d_str.push_back( convertCharToUnsignedInt( hexToDec(s[i+1]) * 16 + hexToDec(s[i+2]) ) );
+ if (i + 2 < s.size()) {
+ if (isxdigit(s[i + 1]) && isxdigit(s[i + 2])) {
+ str.push_back(convertCharToUnsignedInt(hexToDec(s[i + 1]) * 16 +
+ hexToDec(s[i + 2])));
i += 3;
} else {
- throw CVC4::Exception( "Illegal String Literal: \"" + s + "\"" );
+ throw CVC4::Exception("Illegal String Literal: \"" + s + "\"");
}
} else {
- throw CVC4::Exception( "Illegal String Literal: \"" + s + "\", must have two digits after \\x" );
+ throw CVC4::Exception("Illegal String Literal: \"" + s +
+ "\", must have two digits after \\x");
}
- }
- break;
+ } break;
default: {
- if(isdigit(s[i])) {
+ if (isdigit(s[i])) {
int num = (int)s[i] - (int)'0';
bool flag = num < 4;
- if(i+1 < s.size() && num < 8 && isdigit(s[i+1]) && s[i+1] < '8') {
- num = num * 8 + (int)s[i+1] - (int)'0';
- if(flag && i+2 < s.size() && isdigit(s[i+2]) && s[i+2] < '8') {
- num = num * 8 + (int)s[i+2] - (int)'0';
- d_str.push_back( convertCharToUnsignedInt((unsigned char)num) );
+ if (i + 1 < s.size() && num < 8 && isdigit(s[i + 1]) &&
+ s[i + 1] < '8') {
+ num = num * 8 + (int)s[i + 1] - (int)'0';
+ if (flag && i + 2 < s.size() && isdigit(s[i + 2]) &&
+ s[i + 2] < '8') {
+ num = num * 8 + (int)s[i + 2] - (int)'0';
+ str.push_back(convertCharToUnsignedInt((unsigned char)num));
i += 3;
} else {
- d_str.push_back( convertCharToUnsignedInt((unsigned char)num) );
+ str.push_back(convertCharToUnsignedInt((unsigned char)num));
i += 2;
}
} else {
- d_str.push_back( convertCharToUnsignedInt((unsigned char)num) );
+ str.push_back(convertCharToUnsignedInt((unsigned char)num));
i++;
}
- } else if((unsigned)s[i] > 127) {
- throw CVC4::Exception( "Illegal String Literal: \"" + s + "\", must use escaped sequence" );
+ } else if ((unsigned)s[i] > 127) {
+ throw CVC4::Exception("Illegal String Literal: \"" + s +
+ "\", must use escaped sequence");
} else {
- d_str.push_back( convertCharToUnsignedInt(s[i]) );
+ str.push_back(convertCharToUnsignedInt(s[i]));
i++;
}
}
}
} else {
- throw CVC4::Exception( "should be handled by lexer: \"" + s + "\"" );
- //d_str.push_back( convertCharToUnsignedInt('\\') );
+ throw CVC4::Exception("should be handled by lexer: \"" + s + "\"");
+ // str.push_back( convertCharToUnsignedInt('\\') );
}
- } else if((unsigned)s[i] > 127) {
- throw CVC4::Exception( "Illegal String Literal: \"" + s + "\", must use escaped sequence" );
+ } else if ((unsigned)s[i] > 127) {
+ throw CVC4::Exception("Illegal String Literal: \"" + s +
+ "\", must use escaped sequence");
} else {
- d_str.push_back( convertCharToUnsignedInt(s[i]) );
+ str.push_back(convertCharToUnsignedInt(s[i]));
i++;
}
}
+ return str;
}
-void String::getCharSet(std::set<unsigned char> &cset) const {
- for(std::vector<unsigned int>::const_iterator itr = d_str.begin();
- itr != d_str.end(); itr++) {
- cset.insert( convertUnsignedIntToChar(*itr) );
- }
+unsigned char String::getUnsignedCharAt(size_t pos) const {
+ Assert(pos < size());
+ return convertUnsignedIntToChar(d_str[pos]);
}
-std::size_t String::overlap(String &y) const {
- std::size_t i = d_str.size() < y.size() ? d_str.size() : y.size();
- for(; i>0; i--) {
+std::size_t String::overlap(const String &y) const {
+ std::size_t i = size() < y.size() ? size() : y.size();
+ for (; i > 0; i--) {
String s = suffix(i);
String p = y.prefix(i);
- if(s == p) {
+ if (s == p) {
return i;
}
}
return i;
}
-std::size_t String::roverlap(String &y) const {
- std::size_t i = d_str.size() < y.size() ? d_str.size() : y.size();
- for(; i>0; i--) {
+
+std::size_t String::roverlap(const String &y) const {
+ std::size_t i = size() < y.size() ? size() : y.size();
+ for (; i > 0; i--) {
String s = prefix(i);
String p = y.suffix(i);
- if(s == p) {
+ if (s == p) {
return i;
}
}
@@ -124,36 +213,53 @@ std::size_t String::roverlap(String &y) const {
std::string String::toString() const {
std::string str;
- for(unsigned int i=0; i<d_str.size(); ++i) {
- unsigned char c = convertUnsignedIntToChar( d_str[i] );
- if(isprint( c )) {
- if(c == '\\') {
+ for (unsigned int i = 0; i < size(); ++i) {
+ unsigned char c = convertUnsignedIntToChar(d_str[i]);
+ if (isprint(c)) {
+ if (c == '\\') {
str += "\\\\";
- }
- //else if(c == '\"') {
+ }
+ // else if(c == '\"') {
// str += "\\\"";
- //}
+ //}
else {
str += c;
}
} else {
std::string s;
- switch(c) {
- case '\a': s = "\\a"; break;
- case '\b': s = "\\b"; break;
- case '\t': s = "\\t"; break;
- case '\r': s = "\\r"; break;
- case '\v': s = "\\v"; break;
- case '\f': s = "\\f"; break;
- case '\n': s = "\\n"; break;
- case '\e': s = "\\e"; break;
- default : {
+ switch (c) {
+ case '\a':
+ s = "\\a";
+ break;
+ case '\b':
+ s = "\\b";
+ break;
+ case '\t':
+ s = "\\t";
+ break;
+ case '\r':
+ s = "\\r";
+ break;
+ case '\v':
+ s = "\\v";
+ break;
+ case '\f':
+ s = "\\f";
+ break;
+ case '\n':
+ s = "\\n";
+ break;
+ case '\e':
+ s = "\\e";
+ break;
+ default: {
std::stringstream ss;
- ss << std::setfill ('0') << std::setw(2) << std::hex << ((int)c);
+ ss << std::setfill('0') << std::setw(2) << std::hex << ((int)c);
std::string t = ss.str();
- t = t.substr(t.size()-2, 2);
+ t = t.substr(t.size() - 2, 2);
s = "\\x" + t;
- //std::string s2 = static_cast<std::ostringstream*>( &(std::ostringstream() << (int)c) )->str();
+ // std::string s2 = static_cast<std::ostringstream*>(
+ // &(std::ostringstream() << (int)c) )->str();
}
}
str += s;
@@ -162,12 +268,126 @@ std::string String::toString() const {
return str;
}
-std::ostream& operator <<(std::ostream& os, const String& s) {
+bool String::isRepeated() const {
+ if (size() > 1) {
+ unsigned int f = d_str[0];
+ for (unsigned i = 1; i < size(); ++i) {
+ if (f != d_str[i]) return false;
+ }
+ }
+ return true;
+}
+
+bool String::tailcmp(const String &y, int &c) const {
+ int id_x = size() - 1;
+ int id_y = y.size() - 1;
+ while (id_x >= 0 && id_y >= 0) {
+ if (d_str[id_x] != y.d_str[id_y]) {
+ c = id_x;
+ return false;
+ }
+ --id_x;
+ --id_y;
+ }
+ c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1);
+ return true;
+}
+
+std::size_t String::find(const String &y, const std::size_t start) const {
+ if (size() < y.size() + start) return std::string::npos;
+ if (y.empty()) return start;
+ if (empty()) return std::string::npos;
+
+ std::vector<unsigned>::const_iterator itr = std::search(
+ d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
+ if (itr != d_str.end()) {
+ return itr - d_str.begin();
+ }
+ return std::string::npos;
+}
+
+std::size_t String::rfind(const String &y, const std::size_t start) const {
+ if (size() < y.size() + start) return std::string::npos;
+ if (y.empty()) return start;
+ if (empty()) return std::string::npos;
+
+ std::vector<unsigned>::const_reverse_iterator itr = std::search(
+ d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend());
+ if (itr != d_str.rend()) {
+ return itr - d_str.rbegin();
+ }
+ return std::string::npos;
+}
+
+String String::replace(const String &s, const String &t) const {
+ std::size_t ret = find(s);
+ if (ret != std::string::npos) {
+ std::vector<unsigned int> vec;
+ vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
+ vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
+ vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end());
+ return String(vec);
+ } else {
+ return *this;
+ }
+}
+
+String String::substr(std::size_t i) const {
+ Assert(i <= size());
+ std::vector<unsigned int> ret_vec;
+ std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
+ ret_vec.insert(ret_vec.end(), itr, d_str.end());
+ return String(ret_vec);
+}
+
+String String::substr(std::size_t i, std::size_t j) const {
+ Assert(i + j <= size());
+ std::vector<unsigned int> ret_vec;
+ std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
+ ret_vec.insert(ret_vec.end(), itr, itr + j);
+ return String(ret_vec);
+}
+
+bool String::isNumber() const {
+ for (unsigned character : d_str) {
+ unsigned char c = convertUnsignedIntToChar(character);
+ if (c < '0' || c > '9') {
+ return false;
+ }
+ }
+ return true;
+}
+
+int String::toNumber() const {
+ if (isNumber()) {
+ int ret = 0;
+ for (unsigned int i = 0; i < size(); ++i) {
+ unsigned char c = convertUnsignedIntToChar(d_str[i]);
+ ret = ret * 10 + (int)c - (int)'0';
+ }
+ return ret;
+ } else {
+ return -1;
+ }
+}
+
+unsigned char String::hexToDec(unsigned char c) {
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ } else if (c >= 'a' && c <= 'f') {
+ return c - 'a' + 10;
+ } else {
+ Assert(c >= 'A' && c <= 'F');
+ return c - 'A' + 10;
+ }
+}
+
+std::ostream &operator<<(std::ostream &os, const String &s) {
return os << "\"" << s.toString() << "\"";
}
-std::ostream& operator<<(std::ostream& out, const RegExp& s) {
+std::ostream &operator<<(std::ostream &out, const RegExp &s) {
return out << "regexp(" << s.getType() << ')';
}
-}/* CVC4 namespace */
+} // namespace CVC4
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback