summaryrefslogtreecommitdiff
path: root/src/util/regexp.h
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-03-27 14:04:44 -0500
committerGitHub <noreply@github.com>2020-03-27 12:04:44 -0700
commit4b7fc20dcce9eefdf568937f5e2c54141c4f5c5b (patch)
tree70dc3d084b73f1ea313a214245a792295f75215d /src/util/regexp.h
parenta64866663f10db4ffadd2d48500cda05c4831f0e (diff)
Move string utility file (#4164)
Moves the string file to string.h. This is required since other required utilities will soon need to be added to regexp.h.
Diffstat (limited to 'src/util/regexp.h')
-rw-r--r--src/util/regexp.h274
1 files changed, 0 insertions, 274 deletions
diff --git a/src/util/regexp.h b/src/util/regexp.h
deleted file mode 100644
index 56fb969a3..000000000
--- a/src/util/regexp.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/********************* */
-/*! \file regexp.h
- ** \verbatim
- ** Top contributors (to current version):
- ** Andrew Reynolds, Tim King, Tianyi Liang
- ** This file is part of the CVC4 project.
- ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
- ** in the top-level source directory) and their institutional affiliations.
- ** All rights reserved. See the file COPYING in the top-level source
- ** directory for licensing information.\endverbatim
- **
- ** \brief [[ Add one-line brief description here ]]
- **
- ** [[ Add lengthier description here ]]
- ** \todo document this file
- **/
-
-#include "cvc4_public.h"
-
-#ifndef CVC4__REGEXP_H
-#define CVC4__REGEXP_H
-
-#include <cstddef>
-#include <functional>
-#include <ostream>
-#include <string>
-#include <vector>
-#include "util/rational.h"
-
-namespace CVC4 {
-
-/** The CVC4 string class
- *
- * This data structure is the domain of values for the string type. It can also
- * be used as a generic utility for representing strings.
- */
-class CVC4_PUBLIC String {
- public:
- /**
- * This is the cardinality of the alphabet that is representable by this
- * class. Notice that this must be greater than or equal to the cardinality
- * of the alphabet that the string theory reasons about.
- *
- * This must be strictly less than std::numeric_limits<unsigned>::max().
- *
- * As per the SMT-LIB standard for strings, we support the first 3 planes of
- * Unicode characters, where 196608 = 3*16^4.
- */
- static inline unsigned num_codes() { return 196608; }
- /** constructors for String
- *
- * Internally, a CVC4::String is represented by a vector of unsigned
- * integers (d_str) representing the code points of the characters.
- *
- * To build a string from a C++ string, we may process escape sequences
- * according to the SMT-LIB standard. In particular, if useEscSequences is
- * true, we convert unicode escape sequences:
- * \u d_3 d_2 d_1 d_0
- * \u{d_0}
- * \u{d_1 d_0}
- * \u{d_2 d_1 d_0}
- * \u{d_3 d_2 d_1 d_0}
- * \u{d_4 d_3 d_2 d_1 d_0}
- * where d_0 ... d_4 are hexidecimal digits, to the appropriate character.
- *
- * If useEscSequences is false, then the characters of the constructed
- * CVC4::String correspond one-to-one with the input string.
- */
- String() = default;
- explicit String(const std::string& s, bool useEscSequences = false)
- : d_str(toInternal(s, useEscSequences))
- {
- }
- explicit String(const char* s, bool useEscSequences = false)
- : d_str(toInternal(std::string(s), useEscSequences))
- {
- }
- explicit String(const std::vector<unsigned>& s);
-
- String& operator=(const String& y) {
- if (this != &y) {
- d_str = y.d_str;
- }
- return *this;
- }
-
- String concat(const String& other) const;
-
- bool operator==(const String& y) const { return cmp(y) == 0; }
- bool operator!=(const String& y) const { return cmp(y) != 0; }
- bool operator<(const String& y) const { return cmp(y) < 0; }
- bool operator>(const String& y) const { return cmp(y) > 0; }
- bool operator<=(const String& y) const { return cmp(y) <= 0; }
- bool operator>=(const String& y) const { return cmp(y) >= 0; }
-
- /**
- * Returns true if this string is equal to y for their first n characters.
- * If n is larger than the length of this string or y, this method returns
- * true if and only if this string is equal to y.
- */
- bool strncmp(const String& y, std::size_t n) const;
- /**
- * Returns true if this string is equal to y for their last n characters.
- * Similar to strncmp, if n is larger than the length of this string or y,
- * this method returns true if and only if this string is equal to y.
- */
- bool rstrncmp(const String& y, std::size_t n) const;
-
- /* toString
- * Converts this string to a std::string.
- *
- * The unprintable characters are converted to unicode escape sequences as
- * described above.
- *
- * If useEscSequences is false, the string's printable characters are
- * printed as characters. Notice that for all std::string s having only
- * printable characters, we have that
- * CVC4::String( s ).toString() = s.
- */
- std::string toString(bool useEscSequences = false) const;
- /** is this the empty string? */
- bool empty() const { return d_str.empty(); }
- /** is this the empty string? */
- bool isEmptyString() const { return empty(); }
- /** is less than or equal to string y */
- bool isLeq(const String& y) const;
- /** Return the length of the string */
- std::size_t size() const { return d_str.size(); }
-
- bool isRepeated() const;
- bool tailcmp(const String& y, int& c) const;
-
- /**
- * Return the first position y occurs in this string, or std::string::npos
- * otherwise.
- */
- std::size_t find(const String& y, const std::size_t start = 0) const;
- /**
- * Return the first position y occurs in this string searching from the end,
- * or std::string::npos otherwise.
- */
- std::size_t rfind(const String& y, const std::size_t start = 0) const;
- /** Returns true if y is a prefix of this */
- bool hasPrefix(const String& y) const;
- /** Returns true if y is a suffix of this */
- bool hasSuffix(const String& y) const;
- /** Replace the first occurrence of s in this string with t */
- String replace(const String& s, const String& t) const;
- /** Return the substring of this string starting at index i */
- String substr(std::size_t i) const;
- /** Return the substring of this string starting at index i with size at most
- * j */
- String substr(std::size_t i, std::size_t j) const;
- /** Return the prefix of this string of size at most i */
- String prefix(std::size_t i) const { return substr(0, i); }
- /** Return the suffix of this string of size at most i */
- String suffix(std::size_t i) const { return substr(size() - i, i); }
-
- /**
- * Checks if there is any overlap between this string and another string. This
- * corresponds to checking whether one string contains the other and whether a
- * substring of one is a prefix of the other and vice-versa.
- *
- * @param y The other string
- * @return True if there is an overlap, false otherwise
- */
- bool noOverlapWith(const String& y) const;
-
- /** string overlap
- *
- * if overlap returns m>0,
- * then the maximal suffix of this string that is a prefix of y is of length m.
- *
- * For example, if x is "abcdef", then:
- * x.overlap("defg") = 3
- * x.overlap("ab") = 0
- * x.overlap("d") = 0
- * x.overlap("bcdefdef") = 5
- */
- std::size_t overlap(const String& y) const;
- /** string reverse overlap
- *
- * if roverlap returns m>0,
- * then the maximal prefix of this string that is a suffix of y is of length m.
- *
- * For example, if x is "abcdef", then:
- * x.roverlap("aaabc") = 3
- * x.roverlap("def") = 0
- * x.roverlap("d") = 0
- * x.roverlap("defabcde") = 5
- *
- * Notice that x.overlap(y) = y.roverlap(x)
- */
- std::size_t roverlap(const String& y) const;
-
- /**
- * Returns true if this string corresponds in text to a number, for example
- * this returns true for strings "7", "12", "004", "0" and false for strings
- * "abc", "4a", "-4", "".
- */
- bool isNumber() const;
- /** Returns the corresponding rational for the text of this string. */
- Rational toNumber() const;
- /** Get the unsigned representation (code points) of this string */
- const std::vector<unsigned>& getVec() const { return d_str; }
- /**
- * Get the unsigned (code point) value of the first character in this string
- */
- unsigned front() const;
- /**
- * Get the unsigned (code point) value of the last character in this string
- */
- unsigned back() const;
- /** is the unsigned a digit?
- *
- * This is true for code points between 48 ('0') and 57 ('9').
- */
- static bool isDigit(unsigned character);
- /** is the unsigned a hexidecimal digit?
- *
- * This is true for code points between 48 ('0') and 57 ('9'), code points
- * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
- */
- static bool isHexDigit(unsigned character);
- /** is the unsigned a printable code point?
- *
- * This is true for Unicode 32 (' ') to 126 ('~').
- */
- static bool isPrintable(unsigned character);
-
- /**
- * Returns the maximum length of string representable by this class.
- * Corresponds to the maximum size of d_str.
- */
- static size_t maxSize();
- private:
- /**
- * Helper for toInternal: add character ch to vector vec, storing a string in
- * internal format. This throws an error if ch is not a printable character,
- * since non-printable characters must be escaped in SMT-LIB.
- */
- static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
- /**
- * Convert the string s to the internal format (vector of code points).
- * The argument useEscSequences is whether to process unicode escape
- * sequences.
- */
- static std::vector<unsigned> toInternal(const std::string& s,
- bool useEscSequences);
-
- /**
- * Returns a negative number if *this < y, 0 if *this and y are equal and a
- * positive number if *this > y.
- */
- int cmp(const String& y) const;
-
- std::vector<unsigned> d_str;
-}; /* class String */
-
-namespace strings {
-
-struct CVC4_PUBLIC StringHashFunction {
- size_t operator()(const ::CVC4::String& s) const {
- return std::hash<std::string>()(s.toString());
- }
-}; /* struct StringHashFunction */
-
-} // namespace strings
-
-std::ostream& operator<<(std::ostream& os, const String& s) CVC4_PUBLIC;
-
-} // namespace CVC4
-
-#endif /* CVC4__REGEXP_H */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback