diff options
author | Andrew Reynolds <andrew.j.reynolds@gmail.com> | 2020-03-27 14:04:44 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-27 12:04:44 -0700 |
commit | 4b7fc20dcce9eefdf568937f5e2c54141c4f5c5b (patch) | |
tree | 70dc3d084b73f1ea313a214245a792295f75215d /src/util/regexp.cpp | |
parent | a64866663f10db4ffadd2d48500cda05c4831f0e (diff) |
Move string utility file (#4164)
Moves the string file to string.h. This is required since other required utilities will soon need to be added to regexp.h.
Diffstat (limited to 'src/util/regexp.cpp')
-rw-r--r-- | src/util/regexp.cpp | 488 |
1 files changed, 0 insertions, 488 deletions
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp deleted file mode 100644 index 36ba7182b..000000000 --- a/src/util/regexp.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/********************* */ -/*! \file regexp.cpp - ** \verbatim - ** Top contributors (to current version): - ** Tim King, Tianyi Liang, Andrew Reynolds - ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. - ** All rights reserved. See the file COPYING in the top-level source - ** directory for licensing information.\endverbatim - ** - ** \brief [[ Add one-line brief description here ]] - ** - ** [[ Add lengthier description here ]] - ** \todo document this file - **/ - -#include "util/regexp.h" - -#include <algorithm> -#include <climits> -#include <iomanip> -#include <iostream> -#include <sstream> - -#include "base/check.h" -#include "base/exception.h" - -using namespace std; - -namespace CVC4 { - -static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values."); - -String::String(const std::vector<unsigned> &s) : d_str(s) -{ -#ifdef CVC4_ASSERTIONS - for (unsigned u : d_str) - { - Assert(u < num_codes()); - } -#endif -} - -int String::cmp(const String &y) const { - if (size() != y.size()) { - return size() < y.size() ? -1 : 1; - } - for (unsigned int i = 0; i < size(); ++i) { - if (d_str[i] != y.d_str[i]) { - unsigned cp = d_str[i]; - unsigned cpy = y.d_str[i]; - return cp < cpy ? -1 : 1; - } - } - return 0; -} - -String String::concat(const String &other) const { - std::vector<unsigned int> ret_vec(d_str); - ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end()); - return String(ret_vec); -} - -bool String::strncmp(const String& y, std::size_t n) const -{ - std::size_t b = (size() >= y.size()) ? size() : y.size(); - std::size_t s = (size() <= y.size()) ? size() : y.size(); - if (n > s) { - if (b == s) { - n = s; - } else { - return false; - } - } - for (std::size_t i = 0; i < n; ++i) { - if (d_str[i] != y.d_str[i]) return false; - } - return true; -} - -bool String::rstrncmp(const String& y, std::size_t n) const -{ - std::size_t b = (size() >= y.size()) ? size() : y.size(); - std::size_t s = (size() <= y.size()) ? size() : y.size(); - if (n > s) { - if (b == s) { - n = s; - } else { - return false; - } - } - for (std::size_t i = 0; i < n; ++i) { - if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false; - } - return true; -} - -void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str) -{ - // if not a printable character - if (ch > 127 || ch < 32) - { - std::stringstream serr; - serr << "Illegal string character: \"" << ch - << "\", must use escape sequence"; - throw CVC4::Exception(serr.str()); - } - else - { - str.push_back(static_cast<unsigned>(ch)); - } -} - -std::vector<unsigned> String::toInternal(const std::string& s, - bool useEscSequences) -{ - std::vector<unsigned> str; - unsigned i = 0; - while (i < s.size()) - { - // get the current character - char si = s[i]; - if (si != '\\' || !useEscSequences) - { - addCharToInternal(si, str); - ++i; - continue; - } - // the vector of characters, in case we fail to read an escape sequence - std::vector<unsigned> nonEscCache; - // process the '\' - addCharToInternal(si, nonEscCache); - ++i; - // are we an escape sequence? - bool isEscapeSequence = true; - // the string corresponding to the hexidecimal code point - std::stringstream hexString; - // is the slash followed by a 'u'? Could be last character. - if (i >= s.size() || s[i] != 'u') - { - isEscapeSequence = false; - } - else - { - // process the 'u' - addCharToInternal(s[i], nonEscCache); - ++i; - bool isStart = true; - bool isEnd = false; - bool hasBrace = false; - while (i < s.size()) - { - // add the next character - si = s[i]; - if (isStart) - { - isStart = false; - // possibly read '{' - if (si == '{') - { - hasBrace = true; - addCharToInternal(si, nonEscCache); - ++i; - continue; - } - } - else if (si == '}') - { - // can only end if we had an open brace and read at least one digit - isEscapeSequence = hasBrace && !hexString.str().empty(); - isEnd = true; - addCharToInternal(si, nonEscCache); - ++i; - break; - } - // must be a hex digit at this point - if (!isHexDigit(static_cast<unsigned>(si))) - { - isEscapeSequence = false; - break; - } - hexString << si; - addCharToInternal(si, nonEscCache); - ++i; - if (!hasBrace && hexString.str().size() == 4) - { - // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens - isEnd = true; - break; - } - else if (hasBrace && hexString.str().size() > 5) - { - // too many digits enclosed in brace, not an escape sequence - isEscapeSequence = false; - break; - } - } - if (!isEnd) - { - // if we were interupted before ending, then this is not a valid - // escape sequence - isEscapeSequence = false; - } - } - if (isEscapeSequence) - { - Assert(!hexString.str().empty() && hexString.str().size() <= 5); - // Otherwise, we add the escaped character. - // This is guaranteed not to overflow due to the length of hstr. - uint32_t val; - hexString >> std::hex >> val; - if (val > num_codes()) - { - // Failed due to being out of range. This can happen for strings of - // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexidecimal not - // in the range [0-2]. - isEscapeSequence = false; - } - else - { - str.push_back(val); - } - } - // if we did not successfully parse an escape sequence, we add back all - // characters that we cached - if (!isEscapeSequence) - { - str.insert(str.end(), nonEscCache.begin(), nonEscCache.end()); - } - } -#ifdef CVC4_ASSERTIONS - for (unsigned u : str) - { - Assert(u < num_codes()); - } -#endif - return str; -} - -unsigned String::front() const -{ - Assert(!d_str.empty()); - return d_str.front(); -} - -unsigned String::back() const -{ - Assert(!d_str.empty()); - return d_str.back(); -} - -std::size_t String::overlap(const String &y) const { - std::size_t i = size() < y.size() ? size() : y.size(); - for (; i > 0; i--) { - String s = suffix(i); - String p = y.prefix(i); - if (s == p) { - return i; - } - } - return i; -} - -std::size_t String::roverlap(const String &y) const { - std::size_t i = size() < y.size() ? size() : y.size(); - for (; i > 0; i--) { - String s = prefix(i); - String p = y.suffix(i); - if (s == p) { - return i; - } - } - return i; -} - -std::string String::toString(bool useEscSequences) const { - std::stringstream str; - for (unsigned int i = 0; i < size(); ++i) { - // we always print forward slash as a code point so that it cannot - // be interpreted as specifying part of a code point, e.g. the string - // '\' + 'u' + '0' of length three. - if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences) - { - str << static_cast<char>(d_str[i]); - } - else - { - std::stringstream ss; - ss << std::hex << d_str[i]; - str << "\\u{" << ss.str() << "}"; - } - } - return str.str(); -} - -bool String::isLeq(const String &y) const -{ - for (unsigned i = 0; i < size(); ++i) - { - if (i >= y.size()) - { - return false; - } - unsigned ci = d_str[i]; - unsigned cyi = y.d_str[i]; - if (ci > cyi) - { - return false; - } - if (ci < cyi) - { - return true; - } - } - return true; -} - -bool String::isRepeated() const { - if (size() > 1) { - unsigned int f = d_str[0]; - for (unsigned i = 1; i < size(); ++i) { - if (f != d_str[i]) return false; - } - } - return true; -} - -bool String::tailcmp(const String &y, int &c) const { - int id_x = size() - 1; - int id_y = y.size() - 1; - while (id_x >= 0 && id_y >= 0) { - if (d_str[id_x] != y.d_str[id_y]) { - c = id_x; - return false; - } - --id_x; - --id_y; - } - c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1); - return true; -} - -std::size_t String::find(const String &y, const std::size_t start) const { - if (size() < y.size() + start) return std::string::npos; - if (y.empty()) return start; - if (empty()) return std::string::npos; - - std::vector<unsigned>::const_iterator itr = std::search( - d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end()); - if (itr != d_str.end()) { - return itr - d_str.begin(); - } - return std::string::npos; -} - -std::size_t String::rfind(const String &y, const std::size_t start) const { - if (size() < y.size() + start) return std::string::npos; - if (y.empty()) return start; - if (empty()) return std::string::npos; - - std::vector<unsigned>::const_reverse_iterator itr = std::search( - d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend()); - if (itr != d_str.rend()) { - return itr - d_str.rbegin(); - } - return std::string::npos; -} - -bool String::hasPrefix(const String& y) const -{ - size_t s = size(); - size_t ys = y.size(); - if (ys > s) - { - return false; - } - for (size_t i = 0; i < ys; i++) - { - if (d_str[i] != y.d_str[i]) - { - return false; - } - } - return true; -} - -bool String::hasSuffix(const String& y) const -{ - size_t s = size(); - size_t ys = y.size(); - if (ys > s) - { - return false; - } - size_t idiff = s - ys; - for (size_t i = 0; i < ys; i++) - { - if (d_str[i + idiff] != y.d_str[i]) - { - return false; - } - } - return true; -} - -String String::replace(const String &s, const String &t) const { - std::size_t ret = find(s); - if (ret != std::string::npos) { - std::vector<unsigned int> vec; - vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret); - vec.insert(vec.end(), t.d_str.begin(), t.d_str.end()); - vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end()); - return String(vec); - } else { - return *this; - } -} - -String String::substr(std::size_t i) const { - Assert(i <= size()); - std::vector<unsigned int> ret_vec; - std::vector<unsigned int>::const_iterator itr = d_str.begin() + i; - ret_vec.insert(ret_vec.end(), itr, d_str.end()); - return String(ret_vec); -} - -String String::substr(std::size_t i, std::size_t j) const { - Assert(i + j <= size()); - std::vector<unsigned int> ret_vec; - std::vector<unsigned int>::const_iterator itr = d_str.begin() + i; - ret_vec.insert(ret_vec.end(), itr, itr + j); - return String(ret_vec); -} - -bool String::noOverlapWith(const String& y) const -{ - return y.find(*this) == std::string::npos - && this->find(y) == std::string::npos && this->overlap(y) == 0 - && y.overlap(*this) == 0; -} - -bool String::isNumber() const { - if (d_str.empty()) { - return false; - } - for (unsigned character : d_str) { - if (!isDigit(character)) - { - return false; - } - } - return true; -} - -bool String::isDigit(unsigned character) -{ - // '0' to '9' - return 48 <= character && character <= 57; -} - -bool String::isHexDigit(unsigned character) -{ - // '0' to '9' or 'A' to 'F' or 'a' to 'f' - return isDigit(character) || (65 <= character && character <= 70) - || (97 <= character && character <= 102); -} - -bool String::isPrintable(unsigned character) -{ - // Unicode 0x00020 (' ') to 0x0007E ('~') - return 32 <= character && character <= 126; -} - -size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); } - -Rational String::toNumber() const -{ - // when smt2 standard for strings is set, this may change, based on the - // semantics of str.from.int for leading zeros - return Rational(toString()); -} - -std::ostream &operator<<(std::ostream &os, const String &s) { - return os << "\"" << s.toString(true) << "\""; -} - -} // namespace CVC4 |