summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Noetzli <andres.noetzli@gmail.com>2018-11-27 08:10:36 -0800
committerAndrew Reynolds <andrew.j.reynolds@gmail.com>2018-11-27 10:10:36 -0600
commit99a1da848889776586436f7f9aec9a1b088703c1 (patch)
tree1870aa917134196557f43cdc03d8dd36b667a2e3 /src
parent72f1d72852213f46d77c85216c9250bb0f0e3eae (diff)
Reduce lookahead when parsing string literals (#2721)
Diffstat (limited to 'src')
-rw-r--r--src/parser/smt2/Smt2.g105
-rw-r--r--src/parser/smt2/smt2.h7
2 files changed, 46 insertions, 66 deletions
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g
index d9b0f622b..7143824d6 100644
--- a/src/parser/smt2/Smt2.g
+++ b/src/parser/smt2/Smt2.g
@@ -2611,65 +2611,50 @@ termList[std::vector<CVC4::Expr>& formulas, CVC4::Expr& expr]
* Matches a string, and strips off the quotes.
*/
str[std::string& s, bool fsmtlib]
- : STRING_LITERAL_2_0
- { s = AntlrInput::tokenText($STRING_LITERAL_2_0);
+ : STRING_LITERAL
+ {
+ s = AntlrInput::tokenText($STRING_LITERAL);
/* strip off the quotes */
s = s.substr(1, s.size() - 2);
- for(size_t i=0; i<s.size(); i++) {
- if((unsigned)s[i] > 127 && !isprint(s[i])) {
- PARSER_STATE->parseError("Extended/unprintable characters are not "
- "part of SMT-LIB, and they must be encoded "
- "as escape sequences");
+ for (size_t i = 0; i < s.size(); i++)
+ {
+ if ((unsigned)s[i] > 127 && !isprint(s[i]))
+ {
+ PARSER_STATE->parseError(
+ "Extended/unprintable characters are not "
+ "part of SMT-LIB, and they must be encoded "
+ "as escape sequences");
}
}
- if(fsmtlib) {
- /* handle SMT-LIB standard escapes '\\' and '\"' */
+ if (fsmtlib || PARSER_STATE->escapeDupDblQuote())
+ {
char* p_orig = strdup(s.c_str());
char *p = p_orig, *q = p_orig;
- while(*q != '\0') {
- if(*q == '\\') {
+ while (*q != '\0')
+ {
+ if (PARSER_STATE->escapeDupDblQuote() && *q == '"')
+ {
+ // Handle SMT-LIB >=2.5 standard escape '""'.
+ ++q;
+ assert(*q == '"');
+ }
+ else if (!PARSER_STATE->escapeDupDblQuote() && *q == '\\')
+ {
++q;
- if(*q == '\\' || *q == '"') {
- *p++ = *q++;
- } else {
+ // Handle SMT-LIB 2.0 standard escapes '\\' and '\"'.
+ if (*q != '\\' && *q != '"')
+ {
assert(*q != '\0');
*p++ = '\\';
- *p++ = *q++;
}
- } else {
- *p++ = *q++;
}
+ *p++ = *q++;
}
*p = '\0';
s = p_orig;
free(p_orig);
}
}
- | STRING_LITERAL_2_5
- { s = AntlrInput::tokenText($STRING_LITERAL_2_5);
- /* strip off the quotes */
- s = s.substr(1, s.size() - 2);
- for(size_t i=0; i<s.size(); i++) {
- if((unsigned)s[i] > 127 && !isprint(s[i])) {
- PARSER_STATE->parseError("Extended/unprintable characters are not "
- "part of SMT-LIB, and they must be encoded "
- "as escape sequences");
- }
- }
- // In the 2.5 version, always handle escapes (regardless of fsmtlib flag).
- char* p_orig = strdup(s.c_str());
- char *p = p_orig, *q = p_orig;
- while(*q != '\0') {
- if(*q == '"') {
- ++q;
- assert(*q == '"');
- }
- *p++ = *q++;
- }
- *p = '\0';
- s = p_orig;
- free(p_orig);
- }
;
/**
@@ -3276,37 +3261,25 @@ BINARY_LITERAL
;
/**
- * Matches a double-quoted string literal from SMT-LIB 2.0.
- * Escaping is supported, and * escape character '\' has to be escaped.
+ * Matches a double-quoted string literal. Depending on the language that is
+ * being parsed, different escape sequences are supported:
*
- * You shouldn't generally use this in parser rules, as the quotes
- * will be part of the token text. Use the str[] parser rule instead.
- */
-STRING_LITERAL_2_0
- : { PARSER_STATE->v2_0() }?=>
- '"' ('\\' . | ~('\\' | '"'))* '"'
- ;
-
-/**
- * Matches a double-quoted string literal from SMT-LIB 2.5.
- * You escape a double-quote inside the string with "", e.g.,
- * "This is a string literal with "" a single, embedded double quote."
+ * For SMT-LIB 2.0 the sequence \" is interpreted as a double quote (") and the
+ * sequence \\ is interpreted as a backslash (\).
+ *
+ * For SMT-LIB >=2.5 and SyGuS a double-quote inside a string is escaped with
+ * "", e.g., "This is a string literal with "" a single, embedded double
+ * quote."
*
* You shouldn't generally use this in parser rules, as the quotes
* will be part of the token text. Use the str[] parser rule instead.
*/
-STRING_LITERAL_2_5
- : { PARSER_STATE->v2_5() || PARSER_STATE->sygus() }?=>
+STRING_LITERAL
+ : { !PARSER_STATE->escapeDupDblQuote() }?=>
+ '"' ('\\' . | ~('\\' | '"'))* '"'
+ | { PARSER_STATE->escapeDupDblQuote() }?=>
'"' (~('"') | '""')* '"'
;
-
-/**
- * Matches sygus quoted literal
- */
-SYGUS_QUOTED_LITERAL
- : { PARSER_STATE->sygus() }?=>
- '"' (ALPHA|DIGIT)* '"'
- ;
/**
* Matches the comments and ignores them
diff --git a/src/parser/smt2/smt2.h b/src/parser/smt2/smt2.h
index 2d57332af..7a3dbb9db 100644
--- a/src/parser/smt2/smt2.h
+++ b/src/parser/smt2/smt2.h
@@ -183,6 +183,13 @@ private:
bool sygus() const { return getLanguage() == language::input::LANG_SYGUS; }
+ /**
+ * Returns true if the language that we are parsing (SMT-LIB version >=2.5
+ * and SyGuS) treats duplicate double quotes ("") as an escape sequence
+ * denoting a single double quote (").
+ */
+ bool escapeDupDblQuote() const { return v2_5() || sygus(); }
+
void setInfo(const std::string& flag, const SExpr& sexpr);
void setOption(const std::string& flag, const SExpr& sexpr);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback