summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2017-10-18 12:11:50 -0500
committerGitHub <noreply@github.com>2017-10-18 12:11:50 -0500
commit6f18015fdcb824f46b969882aa45187b46306e97 (patch)
tree8d04dcb3fa263a359886aa156851d4dec3e2c7e8 /src
parent382813c77025e05550876bf02f2782b72d6c8927 (diff)
Strings API escape sequences (#1245)
* Argument for strings class to specify whether to process escape sequences. * Change default value on string constructor. * Make CVC4::String::toString symmetric to the constructor for CVC4::String, document. * Clang format.
Diffstat (limited to 'src')
-rw-r--r--src/parser/cvc/Cvc.g2
-rw-r--r--src/parser/smt2/Smt2.g4
-rw-r--r--src/printer/smt2/smt2_printer.cpp4
-rw-r--r--src/util/regexp.cpp16
-rw-r--r--src/util/regexp.h49
5 files changed, 57 insertions, 18 deletions
diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g
index eef7ca54d..6337fb5f6 100644
--- a/src/parser/cvc/Cvc.g
+++ b/src/parser/cvc/Cvc.g
@@ -1978,7 +1978,7 @@ stringTerm[CVC4::Expr& f]
/* string literal */
| str[s]
- { f = MK_CONST(CVC4::String(s)); }
+ { f = MK_CONST(CVC4::String(s, true)); }
| setsTerm[f]
;
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g
index ce1cd1fbd..05faf040e 100644
--- a/src/parser/smt2/Smt2.g
+++ b/src/parser/smt2/Smt2.g
@@ -1031,7 +1031,7 @@ sygusGTerm[CVC4::SygusGTerm& sgt, std::string& fun]
| str[s,false]
{ Debug("parser-sygus") << "Sygus grammar " << fun << " : string literal \""
<< s << "\"" << std::endl;
- sgt.d_expr = MK_CONST( ::CVC4::String(s) );
+ sgt.d_expr = MK_CONST( ::CVC4::String(s, true) );
sgt.d_name = s;
sgt.d_gterm_type = SygusGTerm::gterm_op;
}
@@ -2328,7 +2328,7 @@ termNonVariable[CVC4::Expr& expr, CVC4::Expr& expr2]
expr = MK_CONST( BitVector(binString, 2) ); }
| str[s,false]
- { expr = MK_CONST( ::CVC4::String(s) ); }
+ { expr = MK_CONST( ::CVC4::String(s, true) ); }
| FP_RNE_TOK { expr = MK_CONST(roundNearestTiesToEven); }
| FP_RNA_TOK { expr = MK_CONST(roundNearestTiesToAway); }
| FP_RTP_TOK { expr = MK_CONST(roundTowardPositive); }
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp
index 0a8020651..9c9d1fb76 100644
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -94,7 +94,7 @@ static std::string maybeQuoteSymbol(const std::string& s) {
static bool stringifyRegexp(Node n, stringstream& ss) {
if(n.getKind() == kind::STRING_TO_REGEXP) {
- ss << n[0].getConst<String>().toString();
+ ss << n[0].getConst<String>().toString(true);
} else if(n.getKind() == kind::REGEXP_CONCAT) {
for(unsigned i = 0; i < n.getNumChildren(); ++i) {
if(!stringifyRegexp(n[i], ss)) {
@@ -256,7 +256,7 @@ void Smt2Printer::toStream(std::ostream& out, TNode n,
case kind::CONST_STRING: {
//const std::vector<unsigned int>& s = n.getConst<String>().getVec();
- std::string s = n.getConst<String>().toString();
+ std::string s = n.getConst<String>().toString(true);
out << '"';
for(size_t i = 0; i < s.size(); ++i) {
//char c = String::convertUnsignedIntToChar(s[i]);
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index ef1ab9249..681b574a3 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -84,11 +84,12 @@ bool String::rstrncmp(const String &y, const std::size_t np) const {
return true;
}
-std::vector<unsigned> String::toInternal(const std::string &s) {
+std::vector<unsigned> String::toInternal(const std::string &s,
+ bool useEscSequences) {
std::vector<unsigned> str;
unsigned i = 0;
while (i < s.size()) {
- if (s[i] == '\\') {
+ if (s[i] == '\\' && useEscSequences) {
i++;
if (i < s.size()) {
switch (s[i]) {
@@ -140,6 +141,7 @@ std::vector<unsigned> String::toInternal(const std::string &s) {
} break;
default: {
if (isdigit(s[i])) {
+ // octal escape sequences TODO : revisit (issue #1251).
int num = (int)s[i] - (int)'0';
bool flag = num < 4;
if (i + 1 < s.size() && num < 8 && isdigit(s[i + 1]) &&
@@ -171,7 +173,7 @@ std::vector<unsigned> String::toInternal(const std::string &s) {
throw CVC4::Exception("should be handled by lexer: \"" + s + "\"");
// str.push_back( convertCharToUnsignedInt('\\') );
}
- } else if ((unsigned)s[i] > 127) {
+ } else if ((unsigned)s[i] > 127 && useEscSequences) {
throw CVC4::Exception("Illegal String Literal: \"" + s +
"\", must use escaped sequence");
} else {
@@ -211,11 +213,13 @@ std::size_t String::roverlap(const String &y) const {
return i;
}
-std::string String::toString() const {
+std::string String::toString(bool useEscSequences) const {
std::string str;
for (unsigned int i = 0; i < size(); ++i) {
unsigned char c = convertUnsignedIntToChar(d_str[i]);
- if (isprint(c)) {
+ if (!useEscSequences) {
+ str += c;
+ } else if (isprint(c)) {
if (c == '\\') {
str += "\\\\";
}
@@ -386,7 +390,7 @@ unsigned char String::hexToDec(unsigned char c) {
}
std::ostream &operator<<(std::ostream &os, const String &s) {
- return os << "\"" << s.toString() << "\"";
+ return os << "\"" << s.toString(true) << "\"";
}
std::ostream &operator<<(std::ostream &out, const RegExp &s) {
diff --git a/src/util/regexp.h b/src/util/regexp.h
index f451a8dec..9d351dde4 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -44,9 +44,28 @@ class CVC4_PUBLIC String {
return (c >= ' ' && c <= '~'); // isprint( (int)c );
}
+ /** constructors for String
+ *
+ * Internally, a CVC4::String is represented by a vector of unsigned
+ * integers (d_str), where the correspondence between C++ characters
+ * to and from unsigned integers is determined by
+ * by convertCharToUnsignedInt and convertUnsignedIntToChar.
+ *
+ * If useEscSequences is true, then the escape sequences in the input
+ * are converted to the corresponding character. This constructor may
+ * throw an exception if the input contains unrecognized escape sequences.
+ * Currently supported escape sequences are \n, \t, \v, \b, \r, \f, \a, \\,
+ * \x[N] where N is a hexidecimal, and octal escape sequences of the
+ * form \[c1]([c2]([c3])?)? where c1, c2, c3 are digits from 0 to 7.
+ *
+ * If useEscSequences is false, then the characters of the constructed
+ * CVC4::String correspond one-to-one with the input string.
+ */
String() = default;
- explicit String(const std::string& s) : d_str(toInternal(s)) {}
- explicit String(const char* s) : d_str(toInternal(std::string(s))) {}
+ explicit String(const std::string& s, bool useEscSequences = false)
+ : d_str(toInternal(s, useEscSequences)) {}
+ explicit String(const char* s, bool useEscSequences = false)
+ : d_str(toInternal(std::string(s), useEscSequences)) {}
explicit String(const unsigned char c)
: d_str({convertCharToUnsignedInt(c)}) {}
explicit String(const std::vector<unsigned>& s) : d_str(s) {}
@@ -70,12 +89,27 @@ class CVC4_PUBLIC String {
bool strncmp(const String& y, const std::size_t np) const;
bool rstrncmp(const String& y, const std::size_t np) const;
- /*
- * Convenience functions
- */
- std::string toString() const;
+ /* toString
+ * Converts this string to a std::string.
+ *
+ * If useEscSequences is true, then unprintable characters
+ * are converted to escape sequences. The escape sequences
+ * \n, \t, \v, \b, \r, \f, \a, \\ are printed in this way.
+ * For all other unprintable characters, we print \x[N] where
+ * [N] is the 2 digit hexidecimal corresponding to value of
+ * the character.
+ *
+ * If useEscSequences is false, the returned std::string's characters
+ * map one-to-one with the characters in this string.
+ * Notice that for all std::string s, we have that
+ * CVC4::String( s ).toString() = s.
+ */
+ std::string toString(bool useEscSequences = false) const;
+ /** is this the empty string? */
bool empty() const { return d_str.empty(); }
+ /** is this the empty string? */
bool isEmptyString() const { return empty(); }
+ /** Return the length of the string */
std::size_t size() const { return d_str.size(); }
unsigned char getFirstChar() const { return getUnsignedCharAt(0); }
@@ -107,7 +141,8 @@ class CVC4_PUBLIC String {
// guarded
static unsigned char hexToDec(unsigned char c);
- static std::vector<unsigned> toInternal(const std::string& s);
+ static std::vector<unsigned> toInternal(const std::string& s,
+ bool useEscSequences = true);
unsigned char getUnsignedCharAt(size_t pos) const;
/**
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback