summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher L. Conway <christopherleeconway@gmail.com>2010-05-07 19:44:05 +0000
committerChristopher L. Conway <christopherleeconway@gmail.com>2010-05-07 19:44:05 +0000
commit46e4487c37628217ec64a2b325b287acfb0ae8c5 (patch)
treeae1b831bdf134b1e5ba68c5d15731971a5bdb25c
parentdded216dc01dc123bd54a33b1ca5b6d3c016b237 (diff)
Tightening lexer rules for numerals in SMT v2
-rw-r--r--src/parser/parser.cpp3
-rw-r--r--src/parser/smt2/Smt2.g63
-rw-r--r--test/unit/parser/parser_black.h36
3 files changed, 76 insertions, 26 deletions
diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp
index a393bc85f..b4bafc953 100644
--- a/src/parser/parser.cpp
+++ b/src/parser/parser.cpp
@@ -39,7 +39,8 @@ Parser::Parser(ExprManager* exprManager, Input* input) :
d_exprManager(exprManager),
d_input(input),
d_done(false),
- d_checksEnabled(true) {
+ d_checksEnabled(true),
+ d_strictMode(false) {
d_input->setParser(this);
}
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g
index bcab39183..268903bc7 100644
--- a/src/parser/smt2/Smt2.g
+++ b/src/parser/smt2/Smt2.g
@@ -33,6 +33,7 @@ options {
}
@lexer::includes {
+
/** This suppresses warnings about the redefinition of token symbols between
* different parsers. The redefinitions should be harmless as long as no
* client: (a) #include's the lexer headers for two grammars AND (b) uses the
@@ -48,6 +49,19 @@ options {
#define ANTLR3_INLINE_INPUT_ASCII
}
+@lexer::postinclude {
+#include <stdint.h>
+
+#include "parser/parser.h"
+#include "parser/antlr_input.h"
+
+using namespace CVC4;
+using namespace CVC4::parser;
+
+#undef PARSER_STATE
+#define PARSER_STATE ((Parser*)LEXER->super)
+}
+
@parser::includes {
#include "expr/command.h"
#include "parser/parser.h"
@@ -147,7 +161,7 @@ command returns [CVC4::Command* cmd]
setInfo(PARSER_STATE,name,sexpr);
cmd = new SetInfoCommand(name,sexpr); }
| /* sort declaration */
- DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=NUMERAL
+ DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=INTEGER_LITERAL
{ Debug("parser") << "declare sort: '" << name << "' arity=" << n << std::endl;
if( AntlrInput::tokenToInteger(n) > 0 ) {
Unimplemented("Parameterized user sorts.");
@@ -179,10 +193,10 @@ symbolicExpr[CVC4::SExpr& sexpr]
@declarations {
std::vector<SExpr> children;
}
- : NUMERAL
- { sexpr = SExpr(AntlrInput::tokenText($NUMERAL)); }
- | RATIONAL
- { sexpr = SExpr(AntlrInput::tokenText($RATIONAL)); }
+ : INTEGER_LITERAL
+ { sexpr = SExpr(AntlrInput::tokenText($INTEGER_LITERAL)); }
+ | RATIONAL_LITERAL
+ { sexpr = SExpr(AntlrInput::tokenText($RATIONAL_LITERAL)); }
| STRING_LITERAL
{ sexpr = SExpr(AntlrInput::tokenText($STRING_LITERAL)); }
| SYMBOL
@@ -256,11 +270,11 @@ term[CVC4::Expr& expr]
/* constants */
| TRUE_TOK { expr = MK_CONST(true); }
| FALSE_TOK { expr = MK_CONST(false); }
- | NUMERAL
- { expr = MK_CONST( AntlrInput::tokenToInteger($NUMERAL) ); }
- | RATIONAL
+ | INTEGER_LITERAL
+ { expr = MK_CONST( AntlrInput::tokenToInteger($INTEGER_LITERAL) ); }
+ | RATIONAL_LITERAL
{ // FIXME: This doesn't work because an SMT rational is not a valid GMP rational string
- expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL) ); }
+ expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL_LITERAL) ); }
| HEX_LITERAL
{ Assert( AntlrInput::tokenText($HEX_LITERAL).find("#x") == 0 );
std::string hexString = AntlrInput::tokenTextSubstr($HEX_LITERAL,2);
@@ -460,20 +474,37 @@ WHITESPACE
;
/**
- * Matches an integer constant from the input (non-empty sequence of digits).
- * This is a bit looser than what the standard allows, because it accepts
- * leading zeroes.
+ * Matches an integer constant from the input (non-empty sequence of digits, with
+ * no leading zeroes).
*/
-NUMERAL
- : DIGIT+
+INTEGER_LITERAL
+ : NUMERAL
;
+/** Match an integer constant. In non-strict mode, this is any sequence of
+ * digits. In strict mode, non-zero integers can't have leading zeroes. */
+fragment NUMERAL
+@init {
+ char *start = (char*) GETCHARINDEX();
+}
+ : DIGIT+
+ { Debug("parser-extra") << "NUMERAL: "
+ << (uintptr_t)start << ".." << GETCHARINDEX()
+ << " strict? " << (bool)(PARSER_STATE->strictModeEnabled())
+ << " ^0? " << (bool)(*start == '0')
+ << " len>1? " << (bool)(start < (char*)(GETCHARINDEX() - 1))
+ << endl; }
+ { !PARSER_STATE->strictModeEnabled() ||
+ *start != '0' ||
+ start == (char*)(GETCHARINDEX() - 1) }?
+ ;
+
/**
* Matches a rational constant from the input. This is a bit looser
* than what the standard allows, because it accepts leading zeroes.
*/
-RATIONAL
- : DIGIT+ '.' DIGIT+
+RATIONAL_LITERAL
+ : NUMERAL '.' DIGIT+
;
/**
diff --git a/test/unit/parser/parser_black.h b/test/unit/parser/parser_black.h
index b4044b96f..37fac552e 100644
--- a/test/unit/parser/parser_black.h
+++ b/test/unit/parser/parser_black.h
@@ -176,7 +176,8 @@ const string goodSmt2Exprs[] = {
"0",
"1.5",
"#xfab09c7",
- "#b0001011"
+ "#b0001011",
+ "(* 5 01)" // '01' is OK in non-strict mode
};
const int numGoodSmt2Exprs = sizeof(goodSmt2Exprs) / sizeof(string);
@@ -192,7 +193,8 @@ const string badSmt2Inputs[] = {
const int numBadSmt2Inputs = sizeof(badSmt2Inputs) / sizeof(string);
-/* The following expressions are invalid even after setupContext. */
+/* The following expressions are invalid even after setupContext
+ * in non-strict mode. */
const string badSmt2Exprs[] = {
"(and)", // wrong arity
"(and a b", // no closing paren
@@ -216,6 +218,15 @@ const string badSmt2Exprs[] = {
const int numBadSmt2Exprs = sizeof(badSmt2Exprs) / sizeof(string);
+/* The following expressions are invalid in strict mode, even after setupContext. */
+const string badStrictSmt2Exprs[] = {
+ "(and a)", // no unary and's
+ "(or a)", // no unary or's
+ "(* 5 01)" // '01' is not a valid integer constant
+};
+
+const int numBadStrictSmt2Exprs = sizeof(badStrictSmt2Exprs) / sizeof(string);
+
class ParserBlack : public CxxTest::TestSuite {
ExprManager *d_exprManager;
@@ -270,7 +281,7 @@ class ParserBlack : public CxxTest::TestSuite {
void tryBadInputs(InputLanguage d_lang, const string badInputs[], int numInputs) {
for(int i = 0; i < numInputs; ++i) {
-// cout << "Testing bad input: '" << badInputs[i] << "'\n";
+// cerr << "Testing bad input: '" << badInputs[i] << "'\n";
// Debug.on("parser");
Input* input = Input::newStringInput(d_lang, badInputs[i], "test");
Parser parser(d_exprManager, input);
@@ -320,15 +331,18 @@ class ParserBlack : public CxxTest::TestSuite {
* input. It's more trouble than it's worth to check that the whole input was
* consumed here, so just be careful to avoid valid prefixes in tests.
*/
- void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs) {
- //Debug.on("parser");
+ void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs,
+ bool strictMode = false) {
+// Debug.on("parser");
+// Debug.on("parser-extra");
for(int i = 0; i < numExprs; ++i) {
- // cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n";
-// istringstream stream(context + badBooleanExprs[i]);
+// cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n";
Input* input = Input::newStringInput(d_lang,
badBooleanExprs[i], "test");
Parser parser(d_exprManager, input);
-
+ if( strictMode ) {
+ parser.enableStrictMode();
+ }
setupContext(parser);
TS_ASSERT( !parser.done() );
TS_ASSERT_THROWS
@@ -340,7 +354,7 @@ class ParserBlack : public CxxTest::TestSuite {
const ParserException& );
delete input;
}
- //Debug.off("parser");
+// Debug.off("parser");
}
public:
@@ -405,4 +419,8 @@ public:
void testBadSmt2Exprs() {
tryBadExprs(LANG_SMTLIB_V2,badSmt2Exprs,numBadSmt2Exprs);
}
+
+ void testBadSmt2StrictExprs() {
+ tryBadExprs(LANG_SMTLIB_V2,badStrictSmt2Exprs,numBadStrictSmt2Exprs,true);
+ }
};
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback