diff options
author | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-05-07 19:44:05 +0000 |
---|---|---|
committer | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-05-07 19:44:05 +0000 |
commit | 46e4487c37628217ec64a2b325b287acfb0ae8c5 (patch) | |
tree | ae1b831bdf134b1e5ba68c5d15731971a5bdb25c /src | |
parent | dded216dc01dc123bd54a33b1ca5b6d3c016b237 (diff) |
Tightening lexer rules for numerals in SMT v2
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/parser.cpp | 3 | ||||
-rw-r--r-- | src/parser/smt2/Smt2.g | 63 |
2 files changed, 49 insertions, 17 deletions
diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp index a393bc85f..b4bafc953 100644 --- a/src/parser/parser.cpp +++ b/src/parser/parser.cpp @@ -39,7 +39,8 @@ Parser::Parser(ExprManager* exprManager, Input* input) : d_exprManager(exprManager), d_input(input), d_done(false), - d_checksEnabled(true) { + d_checksEnabled(true), + d_strictMode(false) { d_input->setParser(this); } diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index bcab39183..268903bc7 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -33,6 +33,7 @@ options { } @lexer::includes { + /** This suppresses warnings about the redefinition of token symbols between * different parsers. The redefinitions should be harmless as long as no * client: (a) #include's the lexer headers for two grammars AND (b) uses the @@ -48,6 +49,19 @@ options { #define ANTLR3_INLINE_INPUT_ASCII } +@lexer::postinclude { +#include <stdint.h> + +#include "parser/parser.h" +#include "parser/antlr_input.h" + +using namespace CVC4; +using namespace CVC4::parser; + +#undef PARSER_STATE +#define PARSER_STATE ((Parser*)LEXER->super) +} + @parser::includes { #include "expr/command.h" #include "parser/parser.h" @@ -147,7 +161,7 @@ command returns [CVC4::Command* cmd] setInfo(PARSER_STATE,name,sexpr); cmd = new SetInfoCommand(name,sexpr); } | /* sort declaration */ - DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=NUMERAL + DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=INTEGER_LITERAL { Debug("parser") << "declare sort: '" << name << "' arity=" << n << std::endl; if( AntlrInput::tokenToInteger(n) > 0 ) { Unimplemented("Parameterized user sorts."); @@ -179,10 +193,10 @@ symbolicExpr[CVC4::SExpr& sexpr] @declarations { std::vector<SExpr> children; } - : NUMERAL - { sexpr = SExpr(AntlrInput::tokenText($NUMERAL)); } - | RATIONAL - { sexpr = SExpr(AntlrInput::tokenText($RATIONAL)); } + : INTEGER_LITERAL + { sexpr = SExpr(AntlrInput::tokenText($INTEGER_LITERAL)); } + | RATIONAL_LITERAL + { sexpr = SExpr(AntlrInput::tokenText($RATIONAL_LITERAL)); } | STRING_LITERAL { sexpr = SExpr(AntlrInput::tokenText($STRING_LITERAL)); } | SYMBOL @@ -256,11 +270,11 @@ term[CVC4::Expr& expr] /* constants */ | TRUE_TOK { expr = MK_CONST(true); } | FALSE_TOK { expr = MK_CONST(false); } - | NUMERAL - { expr = MK_CONST( AntlrInput::tokenToInteger($NUMERAL) ); } - | RATIONAL + | INTEGER_LITERAL + { expr = MK_CONST( AntlrInput::tokenToInteger($INTEGER_LITERAL) ); } + | RATIONAL_LITERAL { // FIXME: This doesn't work because an SMT rational is not a valid GMP rational string - expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL) ); } + expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL_LITERAL) ); } | HEX_LITERAL { Assert( AntlrInput::tokenText($HEX_LITERAL).find("#x") == 0 ); std::string hexString = AntlrInput::tokenTextSubstr($HEX_LITERAL,2); @@ -460,20 +474,37 @@ WHITESPACE ; /** - * Matches an integer constant from the input (non-empty sequence of digits). - * This is a bit looser than what the standard allows, because it accepts - * leading zeroes. + * Matches an integer constant from the input (non-empty sequence of digits, with + * no leading zeroes). */ -NUMERAL - : DIGIT+ +INTEGER_LITERAL + : NUMERAL ; +/** Match an integer constant. In non-strict mode, this is any sequence of + * digits. In strict mode, non-zero integers can't have leading zeroes. */ +fragment NUMERAL +@init { + char *start = (char*) GETCHARINDEX(); +} + : DIGIT+ + { Debug("parser-extra") << "NUMERAL: " + << (uintptr_t)start << ".." << GETCHARINDEX() + << " strict? " << (bool)(PARSER_STATE->strictModeEnabled()) + << " ^0? " << (bool)(*start == '0') + << " len>1? " << (bool)(start < (char*)(GETCHARINDEX() - 1)) + << endl; } + { !PARSER_STATE->strictModeEnabled() || + *start != '0' || + start == (char*)(GETCHARINDEX() - 1) }? + ; + /** * Matches a rational constant from the input. This is a bit looser * than what the standard allows, because it accepts leading zeroes. */ -RATIONAL - : DIGIT+ '.' DIGIT+ +RATIONAL_LITERAL + : NUMERAL '.' DIGIT+ ; /** |