summaryrefslogtreecommitdiff
path: root/src/util
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2018-05-07 10:22:27 -0500
committerGitHub <noreply@github.com>2018-05-07 10:22:27 -0500
commitdb43ae511c2103f1e9718a8954e26cf7866d14a8 (patch)
tree3d0654dd38d404bbe31c2bd543a003b351d37336 /src/util
parent884ad1a946ad6a04664ef97121ce1cebb5513d40 (diff)
Add support for str.code (#1821)
Diffstat (limited to 'src/util')
-rw-r--r--src/util/regexp.cpp26
-rw-r--r--src/util/regexp.h48
2 files changed, 61 insertions, 13 deletions
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 9aaad522a..4cbda5147 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -32,6 +32,32 @@ namespace CVC4 {
static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
+unsigned String::convertCharToUnsignedInt(unsigned char c)
+{
+ return convertCodeToUnsignedInt(static_cast<unsigned>(c));
+}
+unsigned char String::convertUnsignedIntToChar(unsigned i)
+{
+ Assert(i < num_codes());
+ return static_cast<unsigned char>(convertUnsignedIntToCode(i));
+}
+bool String::isPrintable(unsigned i)
+{
+ Assert(i < num_codes());
+ unsigned char c = convertUnsignedIntToChar(i);
+ return (c >= ' ' && c <= '~');
+}
+unsigned String::convertCodeToUnsignedInt(unsigned c)
+{
+ Assert(c < num_codes());
+ return (c < start_code() ? c + num_codes() : c) - start_code();
+}
+unsigned String::convertUnsignedIntToCode(unsigned i)
+{
+ Assert(i < num_codes());
+ return (i + start_code()) % num_codes();
+}
+
int String::cmp(const String &y) const {
if (size() != y.size()) {
return size() < y.size() ? -1 : 1;
diff --git a/src/util/regexp.h b/src/util/regexp.h
index 42d232259..c8b491475 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -28,21 +28,43 @@
namespace CVC4 {
+/** The CVC4 string class
+ *
+ * This data structure is the domain of values for the string type. It can also
+ * be used as a generic utility for representing strings.
+ */
class CVC4_PUBLIC String {
public:
- static unsigned convertCharToUnsignedInt(unsigned char c) {
- unsigned i = c;
- i = i + 191;
- return (i >= 256 ? i - 256 : i);
- }
- static unsigned char convertUnsignedIntToChar(unsigned i) {
- unsigned ii = i + 65;
- return (unsigned char)(ii >= 256 ? ii - 256 : ii);
- }
- static bool isPrintable(unsigned i) {
- unsigned char c = convertUnsignedIntToChar(i);
- return (c >= ' ' && c <= '~'); // isprint( (int)c );
- }
+ /**
+ * The start ASCII code. In our string representation below, we represent
+ * characters using a vector d_vec of unsigned integers. We refer to this as
+ * the "internal representation" for the string.
+ *
+ * We make unsigned integer 0 correspond to the 65th character ("A") in the
+ * ASCII alphabet to make models intuitive. In particular, say if we have
+ * a set of string variables that are distinct but otherwise unconstrained,
+ * then the model may assign them "A", "B", "C", ...
+ */
+ static inline unsigned start_code() { return 65; }
+ /**
+ * This is the cardinality of the alphabet that is representable by this
+ * class. Notice that this must be greater than or equal to the cardinality
+ * of the alphabet that the string theory reasons about.
+ */
+ static inline unsigned num_codes() { return 256; }
+ /**
+ * Convert unsigned char to the unsigned used in the internal representation
+ * in d_vec below.
+ */
+ static unsigned convertCharToUnsignedInt(unsigned char c);
+ /** Convert the internal unsigned to a unsigned char. */
+ static unsigned char convertUnsignedIntToChar(unsigned i);
+ /** Does the internal unsigned correspond to a printable character? */
+ static bool isPrintable(unsigned i);
+ /** get the internal unsigned for ASCII code c. */
+ static unsigned convertCodeToUnsignedInt(unsigned c);
+ /** get the ASCII code number that internal unsigned i corresponds to. */
+ static unsigned convertUnsignedIntToCode(unsigned i);
/** constructors for String
*
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback