summaryrefslogtreecommitdiff
path: root/src/util/regexp.cpp
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-03-27 14:04:44 -0500
committerGitHub <noreply@github.com>2020-03-27 12:04:44 -0700
commit4b7fc20dcce9eefdf568937f5e2c54141c4f5c5b (patch)
tree70dc3d084b73f1ea313a214245a792295f75215d /src/util/regexp.cpp
parenta64866663f10db4ffadd2d48500cda05c4831f0e (diff)
Move string utility file (#4164)
Moves the string file to string.h. This is required since other required utilities will soon need to be added to regexp.h.
Diffstat (limited to 'src/util/regexp.cpp')
-rw-r--r--src/util/regexp.cpp488
1 files changed, 0 insertions, 488 deletions
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
deleted file mode 100644
index 36ba7182b..000000000
--- a/src/util/regexp.cpp
+++ /dev/null
@@ -1,488 +0,0 @@
-/********************* */
-/*! \file regexp.cpp
- ** \verbatim
- ** Top contributors (to current version):
- ** Tim King, Tianyi Liang, Andrew Reynolds
- ** This file is part of the CVC4 project.
- ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
- ** in the top-level source directory) and their institutional affiliations.
- ** All rights reserved. See the file COPYING in the top-level source
- ** directory for licensing information.\endverbatim
- **
- ** \brief [[ Add one-line brief description here ]]
- **
- ** [[ Add lengthier description here ]]
- ** \todo document this file
- **/
-
-#include "util/regexp.h"
-
-#include <algorithm>
-#include <climits>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-
-#include "base/check.h"
-#include "base/exception.h"
-
-using namespace std;
-
-namespace CVC4 {
-
-static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
-
-String::String(const std::vector<unsigned> &s) : d_str(s)
-{
-#ifdef CVC4_ASSERTIONS
- for (unsigned u : d_str)
- {
- Assert(u < num_codes());
- }
-#endif
-}
-
-int String::cmp(const String &y) const {
- if (size() != y.size()) {
- return size() < y.size() ? -1 : 1;
- }
- for (unsigned int i = 0; i < size(); ++i) {
- if (d_str[i] != y.d_str[i]) {
- unsigned cp = d_str[i];
- unsigned cpy = y.d_str[i];
- return cp < cpy ? -1 : 1;
- }
- }
- return 0;
-}
-
-String String::concat(const String &other) const {
- std::vector<unsigned int> ret_vec(d_str);
- ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end());
- return String(ret_vec);
-}
-
-bool String::strncmp(const String& y, std::size_t n) const
-{
- std::size_t b = (size() >= y.size()) ? size() : y.size();
- std::size_t s = (size() <= y.size()) ? size() : y.size();
- if (n > s) {
- if (b == s) {
- n = s;
- } else {
- return false;
- }
- }
- for (std::size_t i = 0; i < n; ++i) {
- if (d_str[i] != y.d_str[i]) return false;
- }
- return true;
-}
-
-bool String::rstrncmp(const String& y, std::size_t n) const
-{
- std::size_t b = (size() >= y.size()) ? size() : y.size();
- std::size_t s = (size() <= y.size()) ? size() : y.size();
- if (n > s) {
- if (b == s) {
- n = s;
- } else {
- return false;
- }
- }
- for (std::size_t i = 0; i < n; ++i) {
- if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false;
- }
- return true;
-}
-
-void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
-{
- // if not a printable character
- if (ch > 127 || ch < 32)
- {
- std::stringstream serr;
- serr << "Illegal string character: \"" << ch
- << "\", must use escape sequence";
- throw CVC4::Exception(serr.str());
- }
- else
- {
- str.push_back(static_cast<unsigned>(ch));
- }
-}
-
-std::vector<unsigned> String::toInternal(const std::string& s,
- bool useEscSequences)
-{
- std::vector<unsigned> str;
- unsigned i = 0;
- while (i < s.size())
- {
- // get the current character
- char si = s[i];
- if (si != '\\' || !useEscSequences)
- {
- addCharToInternal(si, str);
- ++i;
- continue;
- }
- // the vector of characters, in case we fail to read an escape sequence
- std::vector<unsigned> nonEscCache;
- // process the '\'
- addCharToInternal(si, nonEscCache);
- ++i;
- // are we an escape sequence?
- bool isEscapeSequence = true;
- // the string corresponding to the hexidecimal code point
- std::stringstream hexString;
- // is the slash followed by a 'u'? Could be last character.
- if (i >= s.size() || s[i] != 'u')
- {
- isEscapeSequence = false;
- }
- else
- {
- // process the 'u'
- addCharToInternal(s[i], nonEscCache);
- ++i;
- bool isStart = true;
- bool isEnd = false;
- bool hasBrace = false;
- while (i < s.size())
- {
- // add the next character
- si = s[i];
- if (isStart)
- {
- isStart = false;
- // possibly read '{'
- if (si == '{')
- {
- hasBrace = true;
- addCharToInternal(si, nonEscCache);
- ++i;
- continue;
- }
- }
- else if (si == '}')
- {
- // can only end if we had an open brace and read at least one digit
- isEscapeSequence = hasBrace && !hexString.str().empty();
- isEnd = true;
- addCharToInternal(si, nonEscCache);
- ++i;
- break;
- }
- // must be a hex digit at this point
- if (!isHexDigit(static_cast<unsigned>(si)))
- {
- isEscapeSequence = false;
- break;
- }
- hexString << si;
- addCharToInternal(si, nonEscCache);
- ++i;
- if (!hasBrace && hexString.str().size() == 4)
- {
- // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
- isEnd = true;
- break;
- }
- else if (hasBrace && hexString.str().size() > 5)
- {
- // too many digits enclosed in brace, not an escape sequence
- isEscapeSequence = false;
- break;
- }
- }
- if (!isEnd)
- {
- // if we were interupted before ending, then this is not a valid
- // escape sequence
- isEscapeSequence = false;
- }
- }
- if (isEscapeSequence)
- {
- Assert(!hexString.str().empty() && hexString.str().size() <= 5);
- // Otherwise, we add the escaped character.
- // This is guaranteed not to overflow due to the length of hstr.
- uint32_t val;
- hexString >> std::hex >> val;
- if (val > num_codes())
- {
- // Failed due to being out of range. This can happen for strings of
- // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexidecimal not
- // in the range [0-2].
- isEscapeSequence = false;
- }
- else
- {
- str.push_back(val);
- }
- }
- // if we did not successfully parse an escape sequence, we add back all
- // characters that we cached
- if (!isEscapeSequence)
- {
- str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
- }
- }
-#ifdef CVC4_ASSERTIONS
- for (unsigned u : str)
- {
- Assert(u < num_codes());
- }
-#endif
- return str;
-}
-
-unsigned String::front() const
-{
- Assert(!d_str.empty());
- return d_str.front();
-}
-
-unsigned String::back() const
-{
- Assert(!d_str.empty());
- return d_str.back();
-}
-
-std::size_t String::overlap(const String &y) const {
- std::size_t i = size() < y.size() ? size() : y.size();
- for (; i > 0; i--) {
- String s = suffix(i);
- String p = y.prefix(i);
- if (s == p) {
- return i;
- }
- }
- return i;
-}
-
-std::size_t String::roverlap(const String &y) const {
- std::size_t i = size() < y.size() ? size() : y.size();
- for (; i > 0; i--) {
- String s = prefix(i);
- String p = y.suffix(i);
- if (s == p) {
- return i;
- }
- }
- return i;
-}
-
-std::string String::toString(bool useEscSequences) const {
- std::stringstream str;
- for (unsigned int i = 0; i < size(); ++i) {
- // we always print forward slash as a code point so that it cannot
- // be interpreted as specifying part of a code point, e.g. the string
- // '\' + 'u' + '0' of length three.
- if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
- {
- str << static_cast<char>(d_str[i]);
- }
- else
- {
- std::stringstream ss;
- ss << std::hex << d_str[i];
- str << "\\u{" << ss.str() << "}";
- }
- }
- return str.str();
-}
-
-bool String::isLeq(const String &y) const
-{
- for (unsigned i = 0; i < size(); ++i)
- {
- if (i >= y.size())
- {
- return false;
- }
- unsigned ci = d_str[i];
- unsigned cyi = y.d_str[i];
- if (ci > cyi)
- {
- return false;
- }
- if (ci < cyi)
- {
- return true;
- }
- }
- return true;
-}
-
-bool String::isRepeated() const {
- if (size() > 1) {
- unsigned int f = d_str[0];
- for (unsigned i = 1; i < size(); ++i) {
- if (f != d_str[i]) return false;
- }
- }
- return true;
-}
-
-bool String::tailcmp(const String &y, int &c) const {
- int id_x = size() - 1;
- int id_y = y.size() - 1;
- while (id_x >= 0 && id_y >= 0) {
- if (d_str[id_x] != y.d_str[id_y]) {
- c = id_x;
- return false;
- }
- --id_x;
- --id_y;
- }
- c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1);
- return true;
-}
-
-std::size_t String::find(const String &y, const std::size_t start) const {
- if (size() < y.size() + start) return std::string::npos;
- if (y.empty()) return start;
- if (empty()) return std::string::npos;
-
- std::vector<unsigned>::const_iterator itr = std::search(
- d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
- if (itr != d_str.end()) {
- return itr - d_str.begin();
- }
- return std::string::npos;
-}
-
-std::size_t String::rfind(const String &y, const std::size_t start) const {
- if (size() < y.size() + start) return std::string::npos;
- if (y.empty()) return start;
- if (empty()) return std::string::npos;
-
- std::vector<unsigned>::const_reverse_iterator itr = std::search(
- d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend());
- if (itr != d_str.rend()) {
- return itr - d_str.rbegin();
- }
- return std::string::npos;
-}
-
-bool String::hasPrefix(const String& y) const
-{
- size_t s = size();
- size_t ys = y.size();
- if (ys > s)
- {
- return false;
- }
- for (size_t i = 0; i < ys; i++)
- {
- if (d_str[i] != y.d_str[i])
- {
- return false;
- }
- }
- return true;
-}
-
-bool String::hasSuffix(const String& y) const
-{
- size_t s = size();
- size_t ys = y.size();
- if (ys > s)
- {
- return false;
- }
- size_t idiff = s - ys;
- for (size_t i = 0; i < ys; i++)
- {
- if (d_str[i + idiff] != y.d_str[i])
- {
- return false;
- }
- }
- return true;
-}
-
-String String::replace(const String &s, const String &t) const {
- std::size_t ret = find(s);
- if (ret != std::string::npos) {
- std::vector<unsigned int> vec;
- vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
- vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
- vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end());
- return String(vec);
- } else {
- return *this;
- }
-}
-
-String String::substr(std::size_t i) const {
- Assert(i <= size());
- std::vector<unsigned int> ret_vec;
- std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
- ret_vec.insert(ret_vec.end(), itr, d_str.end());
- return String(ret_vec);
-}
-
-String String::substr(std::size_t i, std::size_t j) const {
- Assert(i + j <= size());
- std::vector<unsigned int> ret_vec;
- std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
- ret_vec.insert(ret_vec.end(), itr, itr + j);
- return String(ret_vec);
-}
-
-bool String::noOverlapWith(const String& y) const
-{
- return y.find(*this) == std::string::npos
- && this->find(y) == std::string::npos && this->overlap(y) == 0
- && y.overlap(*this) == 0;
-}
-
-bool String::isNumber() const {
- if (d_str.empty()) {
- return false;
- }
- for (unsigned character : d_str) {
- if (!isDigit(character))
- {
- return false;
- }
- }
- return true;
-}
-
-bool String::isDigit(unsigned character)
-{
- // '0' to '9'
- return 48 <= character && character <= 57;
-}
-
-bool String::isHexDigit(unsigned character)
-{
- // '0' to '9' or 'A' to 'F' or 'a' to 'f'
- return isDigit(character) || (65 <= character && character <= 70)
- || (97 <= character && character <= 102);
-}
-
-bool String::isPrintable(unsigned character)
-{
- // Unicode 0x00020 (' ') to 0x0007E ('~')
- return 32 <= character && character <= 126;
-}
-
-size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
-
-Rational String::toNumber() const
-{
- // when smt2 standard for strings is set, this may change, based on the
- // semantics of str.from.int for leading zeros
- return Rational(toString());
-}
-
-std::ostream &operator<<(std::ostream &os, const String &s) {
- return os << "\"" << s.toString(true) << "\"";
-}
-
-} // namespace CVC4
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback