From 8730e9320a833a9eb0e65074f9988950b7424c0c Mon Sep 17 00:00:00 2001 From: "Christopher L. Conway" Date: Tue, 30 Mar 2010 20:22:33 +0000 Subject: Merging from branches/antlr3 (r246:354) --- src/parser/smt/.gitignore | 4 + src/parser/smt/Makefile.am | 38 ++- src/parser/smt/Smt.g | 552 +++++++++++++++++++++++++++++++++++++++++++ src/parser/smt/smt_input.cpp | 73 ++++++ src/parser/smt/smt_input.h | 47 ++++ src/parser/smt/smt_lexer.g | 220 ----------------- src/parser/smt/smt_parser.g | 351 --------------------------- 7 files changed, 694 insertions(+), 591 deletions(-) create mode 100644 src/parser/smt/.gitignore create mode 100644 src/parser/smt/Smt.g create mode 100644 src/parser/smt/smt_input.cpp create mode 100644 src/parser/smt/smt_input.h delete mode 100644 src/parser/smt/smt_lexer.g delete mode 100644 src/parser/smt/smt_parser.g (limited to 'src/parser/smt') diff --git a/src/parser/smt/.gitignore b/src/parser/smt/.gitignore new file mode 100644 index 000000000..7fd0cf319 --- /dev/null +++ b/src/parser/smt/.gitignore @@ -0,0 +1,4 @@ +/.deps +/stamp-generated +/generated +/Makefile.in diff --git a/src/parser/smt/Makefile.am b/src/parser/smt/Makefile.am index 7fe235002..3ea6dc940 100644 --- a/src/parser/smt/Makefile.am +++ b/src/parser/smt/Makefile.am @@ -1,29 +1,29 @@ AM_CPPFLAGS = \ -D__BUILDING_CVC4PARSERLIB \ - -I@srcdir@/../../include -I@srcdir@/../.. + -I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall -fvisibility=hidden +# Compile generated C files using C++ compiler +CC=$(CXX) noinst_LTLIBRARIES = libparsersmt.la ANTLR_TOKEN_STUFF = \ - @srcdir@/generated/SmtVocabularyTokenTypes.hpp \ - @srcdir@/generated/SmtVocabularyTokenTypes.txt \ - @srcdir@/generated/AntlrSmtParserTokenTypes.hpp \ - @srcdir@/generated/AntlrSmtParserTokenTypes.txt + @srcdir@/generated/Smt.tokens ANTLR_LEXER_STUFF = \ - @srcdir@/generated/AntlrSmtLexer.hpp \ - @srcdir@/generated/AntlrSmtLexer.cpp \ + @srcdir@/generated/SmtLexer.h \ + @srcdir@/generated/SmtLexer.c \ $(ANTLR_TOKEN_STUFF) ANTLR_PARSER_STUFF = \ - @srcdir@/generated/AntlrSmtParser.hpp \ - @srcdir@/generated/AntlrSmtParser.cpp + @srcdir@/generated/SmtParser.h \ + @srcdir@/generated/SmtParser.c ANTLR_STUFF = \ $(ANTLR_LEXER_STUFF) \ $(ANTLR_PARSER_STUFF) libparsersmt_la_SOURCES = \ - smt_lexer.g \ - smt_parser.g \ + Smt.g \ + smt_input.h \ + smt_input.cpp \ $(ANTLR_STUFF) BUILT_SOURCES = $(ANTLR_STUFF) @@ -36,16 +36,14 @@ maintainer-clean-local: @srcdir@/stamp-generated: mkdir -p @srcdir@/generated touch @srcdir@/stamp-generated + # antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. -@srcdir@/generated/AntlrSmtLexer.hpp: smt_lexer.g @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_LEXER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_lexer.g" -@srcdir@/generated/AntlrSmtLexer.cpp $(ANTLR_TOKEN_STUFF): @srcdir@/generated/AntlrSmtLexer.hpp -# doesn't actually depend on the lexer, but if we're doing parallel +@srcdir@/generated/SmtLexer.h: Smt.g @srcdir@/stamp-generated + -rm -f $(ANTLR_STUFF) + $(ANTLR) -o "@srcdir@/generated" "@srcdir@/Smt.g" + +# These don't actually depend on SmtLexer.h, but if we're doing parallel # make and the lexer needs to be rebuilt, we have to keep the rules # from running in parallel (since the token files will be deleted & # recreated) -@srcdir@/generated/AntlrSmtParser.hpp: smt_parser.g smt_lexer.g $(ANTLR_TOKEN_STUFF) @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_PARSER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_parser.g" -@srcdir@/generated/AntlrSmtParser.cpp: @srcdir@/generated/AntlrSmtParser.hpp +@srcdir@/generated/SmtLexer.c @srcdir@/generated/SmtParser.h @srcdir@/generated/SmtParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/SmtLexer.h diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g new file mode 100644 index 000000000..7095c7269 --- /dev/null +++ b/src/parser/smt/Smt.g @@ -0,0 +1,552 @@ +/* ******************* */ +/* Smt.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for SMT-LIB input language. + **/ + +grammar Smt; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class SmtInput; +} +} + +extern +void SetSmtInput(CVC4::parser::SmtInput* smt); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/smt/smt_input.h" +#include "util/output.h" +#include + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::SmtInput *input; + +extern +void SetSmtInput(CVC4::parser::SmtInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : annotatedFormula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : b = benchmark { $cmd = b; } + ; + +/** + * Matches the whole SMT-LIB benchmark. + * @return the sequence command containing the whole problem + */ +benchmark returns [CVC4::Command* cmd] + : LPAREN_TOK BENCHMARK_TOK IDENTIFIER c = benchAttributes RPAREN_TOK + { $cmd = c; } + | EOF { $cmd = 0; } + ; + +/** + * Matches a sequence of benchmark attributes and returns a pointer to a + * command sequence. + * @return the command sequence + */ +benchAttributes returns [CVC4::CommandSequence* cmd_seq] +@init { + cmd_seq = new CommandSequence(); +} + : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ + ; + +/** + * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns + * a corresponding command + * @return a command corresponding to the attribute + */ +benchAttribute returns [CVC4::Command* smt_command] +@declarations { + std::string name; + BenchmarkStatus b_status; + Expr expr; +} + : LOGIC_TOK identifier[name,CHECK_NONE,SYM_VARIABLE] + { input->setLogic(name); + smt_command = new SetBenchmarkLogicCommand(name); } + | ASSUMPTION_TOK annotatedFormula[expr] + { smt_command = new AssertCommand(expr); } + | FORMULA_TOK annotatedFormula[expr] + { smt_command = new CheckSatCommand(expr); } + | STATUS_TOK status[b_status] + { smt_command = new SetBenchmarkStatusCommand(b_status); } + | EXTRAFUNS_TOK LPAREN_TOK (functionDeclaration)+ RPAREN_TOK + | EXTRAPREDS_TOK LPAREN_TOK (predicateDeclaration)+ RPAREN_TOK + | EXTRASORTS_TOK LPAREN_TOK sortDeclaration+ RPAREN_TOK + | NOTES_TOK STRING_LITERAL + | annotation + ; + +/** + * Matches an annotated formula. + * @return the expression representing the formula + */ +annotatedFormula[CVC4::Expr& expr] +@init { + Debug("parser") << "annotated formula: " << AntlrInput::tokenText(LT(1)) << std::endl; + Kind kind; + std::string name; + std::vector args; /* = getExprVector(); */ +} + : /* a built-in operator application */ + LPAREN_TOK builtinOp[kind] annotatedFormulas[args,expr] RPAREN_TOK + { input->checkArity(kind, args.size()); + if((kind == CVC4::kind::AND || kind == CVC4::kind::OR) && args.size() == 1) { + /* Unary AND/OR can be replaced with the argument. + It just so happens expr should already by the only argument. */ + Assert( expr == args[0] ); + } else { + expr = input->mkExpr(kind, args); + } + } + + | /* A non-built-in function application */ + + // Semantic predicate not necessary if parenthesized subexpressions + // are disallowed + // { isFunction(LT(2)->getText()) }? + + LPAREN_TOK + functionSymbol[expr] + { args.push_back(expr); } + annotatedFormulas[args,expr] RPAREN_TOK + // TODO: check arity + { expr = input->mkExpr(CVC4::kind::APPLY_UF,args); } + + | /* An ite expression */ + LPAREN_TOK (ITE_TOK | IF_THEN_ELSE_TOK) + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + RPAREN_TOK + { expr = input->mkExpr(CVC4::kind::ITE, args); } + + | /* a let/flet binding */ + LPAREN_TOK + (LET_TOK LPAREN_TOK var_identifier[name,CHECK_UNDECLARED] + | FLET_TOK LPAREN_TOK fun_identifier[name,CHECK_UNDECLARED] ) + annotatedFormula[expr] RPAREN_TOK + { input->defineVar(name,expr); } + annotatedFormula[expr] + RPAREN_TOK + { input->undefineVar(name); } + + | /* a variable */ + ( identifier[name,CHECK_DECLARED,SYM_VARIABLE] + | var_identifier[name,CHECK_DECLARED] + | fun_identifier[name,CHECK_DECLARED] ) + { expr = input->getVariable(name); } + + /* constants */ + | TRUE_TOK { expr = input->getTrueExpr(); } + | FALSE_TOK { expr = input->getFalseExpr(); } + /* TODO: let, flet, quantifiers, arithmetic constants */ + ; + +/** + * Matches a sequence of annotated formulas and puts them into the formulas + * vector. + * @param formulas the vector to fill with formulas + * @param expr an Expr reference for the elements of the sequence + */ +/* NOTE: We pass an Expr in here just to avoid allocating a fresh Expr every + * time through this rule. */ +annotatedFormulas[std::vector& formulas, CVC4::Expr& expr] + : ( annotatedFormula[expr] { formulas.push_back(expr); } )+ + ; + +/** +* Matches on of the unary Boolean connectives. +* @return the kind of the Bollean connective +*/ +builtinOp[CVC4::Kind& kind] +@init { + Debug("parser") << "builtin: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : NOT_TOK { $kind = CVC4::kind::NOT; } + | IMPLIES_TOK { $kind = CVC4::kind::IMPLIES; } + | AND_TOK { $kind = CVC4::kind::AND; } + | OR_TOK { $kind = CVC4::kind::OR; } + | XOR_TOK { $kind = CVC4::kind::XOR; } + | IFF_TOK { $kind = CVC4::kind::IFF; } + | EQUAL_TOK { $kind = CVC4::kind::EQUAL; } + | DISTINCT_TOK { $kind = CVC4::kind::DISTINCT; } + /* TODO: lt, gt, plus, minus, etc. */ + ; + +/** + * Matches a (possibly undeclared) predicate identifier (returning the string). + * @param check what kind of check to do with the symbol + */ +predicateName[std::string& name, CVC4::parser::DeclarationCheck check] + : functionName[name,check] + ; + +/** + * Matches a (possibly undeclared) function identifier (returning the string) + * @param check what kind of check to do with the symbol + */ +functionName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_FUNCTION] + ; + +/** + * Matches an previously declared function symbol (returning an Expr) + */ +functionSymbol[CVC4::Expr& fun] +@declarations { + std::string name; +} + : functionName[name,CHECK_DECLARED] + { input->checkFunction(name); + fun = input->getFunction(name); } + ; + +/** + * Matches an attribute name from the input (:attribute_name). + */ +attribute + : ATTR_IDENTIFIER + ; + + + +functionDeclaration +@declarations { + std::string name; + std::vector sorts; +} + : LPAREN_TOK functionName[name,CHECK_UNDECLARED] + t = sortSymbol // require at least one sort + { sorts.push_back(t); } + sortList[sorts] RPAREN_TOK + { t = input->functionType(sorts); + input->mkVar(name, t); } + ; + +/** + * Matches the declaration of a predicate and declares it + */ +predicateDeclaration +@declarations { + std::string name; + std::vector p_sorts; +} + : LPAREN_TOK predicateName[name,CHECK_UNDECLARED] sortList[p_sorts] RPAREN_TOK + { Type *t = input->predicateType(p_sorts); + input->mkVar(name, t); } + ; + +sortDeclaration +@declarations { + std::string name; +} + : sortName[name,CHECK_UNDECLARED] + { Debug("parser") << "sort decl: '" << name << "'" << std::endl; + input->newSort(name); } + ; + +/** + * Matches a sequence of sort symbols and fills them into the given vector. + */ +sortList[std::vector& sorts] + : ( t = sortSymbol { sorts.push_back(t); })* + ; + +/** + * Matches the sort symbol, which can be an arbitrary identifier. + * @param check the check to perform on the name + */ +sortName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_SORT] + ; + +sortSymbol returns [CVC4::Type* t] +@declarations { + std::string name; +} + : sortName[name,CHECK_NONE] + { $t = input->getSort(name); } + ; + +/** + * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. + */ +status[ CVC4::BenchmarkStatus& status ] + : SAT_TOK { $status = SMT_SATISFIABLE; } + | UNSAT_TOK { $status = SMT_UNSATISFIABLE; } + | UNKNOWN_TOK { $status = SMT_UNKNOWN; } + ; + +/** + * Matches an annotation, which is an attribute name, with an optional user + */ +annotation + : attribute (USER_VALUE)? + ; + +/** + * Matches an identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + * @param type the intended namespace for the identifier + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + Debug("parser") << "identifier: " << id + << " check? " << toString(check) + << " type? " << toString(type) << std::endl; + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches an variable identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +var_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : VAR_IDENTIFIER + { id = AntlrInput::tokenText($VAR_IDENTIFIER); + Debug("parser") << "var_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_VARIABLE); } + ; + +/** + * Matches an function identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +fun_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : FUN_IDENTIFIER + { id = AntlrInput::tokenText($FUN_IDENTIFIER); + Debug("parser") << "fun_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_FUNCTION); } + ; + + +// Base SMT-LIB tokens +DISTINCT_TOK : 'distinct'; +ITE_TOK : 'ite'; +IF_THEN_ELSE_TOK : 'if_then_else'; +TRUE_TOK : 'true'; +FALSE_TOK : 'false'; +NOT_TOK : 'not'; +IMPLIES_TOK : 'implies'; +AND_TOK : 'and'; +OR_TOK : 'or'; +XOR_TOK : 'xor'; +IFF_TOK : 'iff'; +EXISTS_TOK : 'exists'; +FORALL_TOK : 'forall'; +LET_TOK : 'let'; +FLET_TOK : 'flet'; +THEORY_TOK : 'theory'; +SAT_TOK : 'sat'; +UNSAT_TOK : 'unsat'; +UNKNOWN_TOK : 'unknown'; +BENCHMARK_TOK : 'benchmark'; + +// The SMT attribute tokens +LOGIC_TOK : ':logic'; +ASSUMPTION_TOK : ':assumption'; +FORMULA_TOK : ':formula'; +STATUS_TOK : ':status'; +EXTRASORTS_TOK : ':extrasorts'; +EXTRAFUNS_TOK : ':extrafuns'; +EXTRAPREDS_TOK : ':extrapreds'; +NOTES_TOK : ':notes'; + +// arithmetic symbols +EQUAL_TOK : '='; +LESS_THAN_TOK : '<'; +GREATER_THAN_TOK : '>'; +AMPERSAND_TOK : '&'; +AT_TOK : '@'; +POUND_TOK : '#'; +PLUS_TOK : '+'; +MINUS_TOK : '-'; +STAR_TOK : '*'; +DIV_TOK : '/'; +PERCENT_TOK : '%'; +PIPE_TOK : '|'; +TILDE_TOK : '~'; + +// Language meta-symbols +//QUESTION_TOK : '?'; +//DOLLAR_TOK : '$'; +LPAREN_TOK : '('; +RPAREN_TOK : ')'; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER /*options { paraphrase = 'an identifier'; testLiterals = true; }*/ + : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* + ; + +/** + * Matches an identifier starting with a colon. + */ +ATTR_IDENTIFIER /*options { paraphrase = 'an identifier starting with a colon'; testLiterals = true; }*/ + : ':' IDENTIFIER + ; + +/** + * Matches an identifier starting with a question mark. + */ +VAR_IDENTIFIER + : '?' IDENTIFIER + ; + +/** + * Matches an identifier starting with a dollar sign. + */ +FUN_IDENTIFIER + : '$' IDENTIFIER + ; + +/** + * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with + * an open brace and end with closed brace. + */ +USER_VALUE + : '{' + ( ~('{' | '}') )* + '}' + ; + + +/** + * Matches and skips whitespace in the input. + */ +WHITESPACE /*options { paraphrase = 'whitespace'; }*/ + : (' ' | '\t' | '\f' | '\r' | '\n')+ { $channel = HIDDEN;; } + ; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL_TOK /*options { paraphrase = 'a numeral'; }*/ + : (DIGIT)+ + ; + +/** + * Matches a double quoted string literal. Escaping is supported, and escape + * character '\' has to be escaped. + */ +STRING_LITERAL /*options { paraphrase = 'a string literal'; }*/ + : '"' (ESCAPE | ~('"'|'\\'))* '"' + ; + +/** + * Matches the comments and ignores them + */ +COMMENT /*options { paraphrase = 'comment'; }*/ + : ';' (~('\n' | '\r'))* { $channel = HIDDEN;; } + ; + + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment +ALPHA /*options { paraphrase = 'a letter'; }*/ + : 'a'..'z' + | 'A'..'Z' + ; + +/** + * Matches the digits (0-9) + */ +fragment +DIGIT /*options { paraphrase = 'a digit'; }*/ + : '0'..'9' + ; + + +/** + * Matches an allowed escaped character. + */ +fragment ESCAPE + : '\\' ('"' | '\\' | 'n' | 't' | 'r') + ; + diff --git a/src/parser/smt/smt_input.cpp b/src/parser/smt/smt_input.cpp new file mode 100644 index 000000000..7a28c30f1 --- /dev/null +++ b/src/parser/smt/smt_input.cpp @@ -0,0 +1,73 @@ +/* + * smt_parser.cpp + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#include + +#include "expr/expr_manager.h" +#include "parser/parser_exception.h" +#include "parser/smt/smt_input.h" +#include "parser/smt/generated/SmtLexer.h" +#include "parser/smt/generated/SmtParser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=2 */ +SmtInput::SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap) : + AntlrInput(exprManager,filename,2,useMmap) { + init(); +} + +SmtInput::SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name) : + AntlrInput(exprManager,input,name,2) { + init(); +} + +void SmtInput::init() { + pANTLR3_INPUT_STREAM input = getInputStream(); + AlwaysAssert( input != NULL ); + + d_pSmtLexer = SmtLexerNew(input); + if( d_pSmtLexer == NULL ) { + throw ParserException("Failed to create SMT lexer."); + } + + setLexer( d_pSmtLexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pSmtParser = SmtParserNew(tokenStream); + if( d_pSmtParser == NULL ) { + throw ParserException("Failed to create SMT parser."); + } + + setParser(d_pSmtParser->pParser); + SetSmtInput(this); +} + + +SmtInput::~SmtInput() { + d_pSmtLexer->free(d_pSmtLexer); + d_pSmtParser->free(d_pSmtParser); +} + +Command* SmtInput::doParseCommand() throw (ParserException) { + return d_pSmtParser->parseCommand(d_pSmtParser); +} + +Expr SmtInput::doParseExpr() throw (ParserException) { + return d_pSmtParser->parseExpr(d_pSmtParser); +} + +pANTLR3_LEXER SmtInput::getLexer() { + return d_pSmtLexer->pLexer; +} + +} // namespace parser + +} // namespace CVC4 diff --git a/src/parser/smt/smt_input.h b/src/parser/smt/smt_input.h new file mode 100644 index 000000000..b3613d67b --- /dev/null +++ b/src/parser/smt/smt_input.h @@ -0,0 +1,47 @@ +/* + * smt_parser.h + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#ifndef SMT_PARSER_H_ +#define SMT_PARSER_H_ + +#include "parser/antlr_input.h" +#include "parser/smt/generated/SmtLexer.h" +#include "parser/smt/generated/SmtParser.h" + +// extern void SmtParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class SmtInput : public AntlrInput { +public: + SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap); + SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name); + ~SmtInput(); + +protected: + Command* doParseCommand() throw(ParserException); + Expr doParseExpr() throw(ParserException); + pANTLR3_LEXER getLexer(); + pANTLR3_LEXER createLexer(pANTLR3_INPUT_STREAM input); + pANTLR3_PARSER createParser(pANTLR3_COMMON_TOKEN_STREAM tokenStream); + +private: + void init(); + pSmtLexer d_pSmtLexer; + pSmtParser d_pSmtParser; +}; // class SmtInput +} // namespace parser + +} // namespace CVC4 + +#endif /* SMT_PARSER_H_ */ diff --git a/src/parser/smt/smt_lexer.g b/src/parser/smt/smt_lexer.g deleted file mode 100644 index d694cd93f..000000000 --- a/src/parser/smt/smt_lexer.g +++ /dev/null @@ -1,220 +0,0 @@ -/* ******************* */ -/* smt_lexer.g - ** Original author: dejan - ** Major contributors: cconway, mdeters - ** Minor contributors (to current version): none - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Lexer for SMT-LIB input language. - **/ - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrSmtLexer class is a stream tokenizer (lexer) for the SMT-LIB benchmark - * language. - */ -class AntlrSmtLexer extends Lexer; - -options { - exportVocab = SmtVocabulary; // Name of the shared token vocabulary - testLiterals = false; // Do not check for literals by default - defaultErrorHandler = false; // Skip the default error handling, just break with exceptions - k = 2; -} - -tokens { - // Base SMT-LIB tokens - DISTINCT = "distinct"; - ITE = "ite"; - IF_THEN_ELSE = "if_then_else"; - TRUE = "true"; - FALSE = "false"; - NOT = "not"; - IMPLIES = "implies"; - AND = "and"; - OR = "or"; - XOR = "xor"; - IFF = "iff"; - EXISTS = "exists"; - FORALL = "forall"; - LET = "let"; - FLET = "flet"; - THEORY = "theory"; - LOGIC = "logic"; - SAT = "sat"; - UNSAT = "unsat"; - UNKNOWN = "unknown"; - BENCHMARK = "benchmark"; - // The SMT attribute tokens - LOGIC_ATTR = ":logic"; - ASSUMPTION_ATTR = ":assumption"; - FORMULA_ATTR = ":formula"; - STATUS_ATTR = ":status"; - EXTRASORTS_ATTR = ":extrasorts"; - EXTRAFUNS_ATTR = ":extrafuns"; - EXTRAPREDS_ATTR = ":extrapreds"; - C_NOTES = ":notes"; - // arithmetic symbols - EQUAL = "="; - LESS_THAN = "<"; - GREATER_THAN = ">"; - AMPERSAND = "&"; - AT = "@"; - POUND = "#"; - PLUS = "+"; - MINUS = "-"; - STAR = "*"; - DIV = "/"; - PERCENT = "%"; - PIPE = "|"; - TILDE = "~"; -} - -/** - * Matches any letter ('a'-'z' and 'A'-'Z'). - */ -protected -ALPHA options { paraphrase = "a letter"; } - : 'a'..'z' - | 'A'..'Z' - ; - -/** - * Matches the digits (0-9) - */ -protected -DIGIT options { paraphrase = "a digit"; } - : '0'..'9' - ; - -/** - * Matches an identifier from the input. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a letter. - */ -IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; } - : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -/** - * Matches an identifier starting with a colon. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a colon. - */ -C_IDENTIFIER options { paraphrase = "an attribute (e.g., ':x')"; testLiterals = true; } - : ':' ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -VAR_IDENTIFIER options { paraphrase = "a variable (e.g., '?x')"; testLiterals = false; } - : '?' IDENTIFIER - ; - -FUN_IDENTIFIER options { paraphrase = "a function variable (e.g, '$x')"; testLiterals = false; } - : '$' IDENTIFIER - ; - - -/** - * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with - * an open brace and end with closed brace. - */ -USER_VALUE - : '{' - ( '\n' { newline(); } - | ~('{' | '}' | '\n') - )* - '}' - ; - -/** - * Matches the question mark symbol ('?'). - */ -QUESTION_MARK options { paraphrase = "a question mark '?'"; } - : '?' - ; - -/** - * Matches the dollar sign ('$'). - */ -DOLLAR_SIGN options { paraphrase = "a dollar sign '$'"; } - : '$' - ; - -/** - * Matches the left bracket ('('). - */ -LPAREN options { paraphrase = "a left parenthesis '('"; } - : '('; - -/** - * Matches the right bracket ('('). - */ -RPAREN options { paraphrase = "a right parenthesis ')'"; } - : ')'; - -/** - * Matches and skips whitespace in the input. - */ -WHITESPACE options { paraphrase = "whitespace"; } - : (' ' | '\t' | '\f') { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches and skips the newline symbols in the input. - */ -NEWLINE options { paraphrase = "a newline"; } - : ('\r' '\n' | '\r' | '\n') { $setType(antlr::Token::SKIP); newline(); } - ; - -/** - * Matches a numeral from the input (non-empty sequence of digits). - */ -NUMERAL options { paraphrase = "a numeral"; } - : (DIGIT)+ - ; - -/** - * Matches an allowed escaped character. - */ -protected ESCAPE - : '\\' ('"' | '\\' | 'n' | 't' | 'r') - ; - -/** - * Matches a double quoted string literal. Escaping is supported, and escape - * character '\' has to be escaped. - */ -STRING_LITERAL options { paraphrase = "a string literal"; } - : '"' (ESCAPE | ~('"'|'\\'))* '"' - ; - -/** - * Matches the comments and ignores them - */ -COMMENT options { paraphrase = "comment"; } - : ';' (~('\n' | '\r'))* { $setType(antlr::Token::SKIP); } - ; - -/* Arithmetic symbol tokens */ -EQUAL : "="; -LESS_THAN : "<"; -GREATER_THAN : ">"; -AMPERSAND : "&"; -AT : "@"; -POUND : "#"; -PLUS : "+"; -MINUS : "-"; -STAR : "*"; -DIV : "/"; -PERCENT : "%"; -PIPE : "|"; -TILDE : "~"; diff --git a/src/parser/smt/smt_parser.g b/src/parser/smt/smt_parser.g deleted file mode 100644 index b876e1649..000000000 --- a/src/parser/smt/smt_parser.g +++ /dev/null @@ -1,351 +0,0 @@ -/* ******************* */ -/* smt_parser.g - ** Original author: dejan - ** Major contributors: mdeters, cconway - ** Minor contributors (to current version): none - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser for SMT-LIB input language. - **/ - -header "post_include_hpp" { -#include "parser/antlr_parser.h" -#include "expr/command.h" -} - -header "post_include_cpp" { -#include "expr/type.h" -#include "util/output.h" -#include - -using namespace std; -using namespace CVC4; -using namespace CVC4::parser; -} - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrSmtParser class is the parser for the SMT-LIB files. - */ -class AntlrSmtParser extends Parser("AntlrParser"); -options { - genHashLines = true; // Include line number information - importVocab = SmtVocabulary; // Import the common vocabulary - defaultErrorHandler = false; // Skip the default error handling, just break with exceptions - k = 2; -} - -/** - * Parses an expression. - * @return the parsed expression - */ -parseExpr returns [CVC4::Expr expr] - : expr = annotatedFormula - | EOF - ; - -/** - * Parses a command (the whole benchmark) - * @return the command of the benchmark - */ -parseCommand returns [CVC4::Command* cmd] - : cmd = benchmark - ; - -/** - * Matches the whole SMT-LIB benchmark. - * @return the sequence command containing the whole problem - */ -benchmark returns [Command* cmd] - : LPAREN BENCHMARK IDENTIFIER cmd = benchAttributes RPAREN - | EOF { cmd = 0; } - ; - -/** - * Matches a sequence of benchmark attributes and returns a pointer to a - * command sequence. - * @return the command sequence - */ -benchAttributes returns [CVC4::CommandSequence* cmd_seq = new CommandSequence()] -{ - Command* cmd; -} - : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ - ; - -/** - * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns - * a corresponding command - * @retrurn a command corresponding to the attribute - */ -benchAttribute returns [Command* smt_command = 0] -{ - Expr formula; - string logic; - SetBenchmarkStatusCommand::BenchmarkStatus b_status = SetBenchmarkStatusCommand::SMT_UNKNOWN; -} - : LOGIC_ATTR logic = identifier - { setLogic(logic); - smt_command = new SetBenchmarkLogicCommand(logic); } - | ASSUMPTION_ATTR formula = annotatedFormula - { smt_command = new AssertCommand(formula); } - | FORMULA_ATTR formula = annotatedFormula - { smt_command = new CheckSatCommand(formula); } - | STATUS_ATTR b_status = status - { smt_command = new SetBenchmarkStatusCommand(b_status); } - | EXTRAFUNS_ATTR LPAREN (functionDeclaration)+ RPAREN - | EXTRAPREDS_ATTR LPAREN (predicateDeclaration)+ RPAREN - | EXTRASORTS_ATTR LPAREN (sortDeclaration)+ RPAREN - | NOTES_ATTR STRING_LITERAL - | annotation - ; - -/** - * Matches an annotated formula. - * @return the expression representing the formula - */ -annotatedFormula returns [CVC4::Expr formula] -{ - Debug("parser") << "annotated formula: " << LT(1)->getText() << endl; - Kind kind = CVC4::kind::UNDEFINED_KIND; - vector args; - std::string name; - Expr expr1, expr2, expr3; -} - : /* a built-in operator application */ - LPAREN kind = builtinOp annotatedFormulas[args] RPAREN - { checkArity(kind, args.size()); - if((kind == kind::AND || kind == kind::OR) && args.size() == 1) { - formula = args[0]; - } else { - formula = mkExpr(kind, args); - } - } - - | /* a "distinct" expr */ - /* TODO: Should there be a DISTINCT kind in the Expr package? */ - LPAREN DISTINCT annotatedFormulas[args] RPAREN - { formula = mkExpr(CVC4::kind::DISTINCT, args); } - - | /* An ite expression */ - LPAREN (ITE | IF_THEN_ELSE) - { Expr test, trueExpr, falseExpr; } - test = annotatedFormula - trueExpr = annotatedFormula - falseExpr = annotatedFormula - RPAREN - { formula = mkExpr(CVC4::kind::ITE, test, trueExpr, falseExpr); } - - | /* A let/flet binding */ - LPAREN (LET LPAREN name=variable[CHECK_UNDECLARED] - | FLET LPAREN name=function_var[CHECK_UNDECLARED] ) - expr1=annotatedFormula RPAREN - { defineVar(name,expr1); } - formula=annotatedFormula - { undefineVar(name); } - RPAREN - - | /* A non-built-in function application */ - { Expr f; } - LPAREN f = functionSymbol - { args.push_back(f); } - annotatedFormulas[args] RPAREN - // TODO: check arity - { formula = mkExpr(CVC4::kind::APPLY_UF, args); } - - | /* a variable */ - { std::string name; } - ( name = identifier[CHECK_DECLARED] - | name = variable[CHECK_DECLARED] - | name = function_var[CHECK_DECLARED] ) - { formula = getVariable(name); } - - /* constants */ - | TRUE { formula = getTrueExpr(); } - | FALSE { formula = getFalseExpr(); } - /* TODO: quantifiers, arithmetic constants */ - ; - -/** - * Matches a sequence of annotaed formulas and puts them into the formulas - * vector. - * @param formulas the vector to fill with formulas - */ -annotatedFormulas[std::vector& formulas] -{ - Expr f; -} - : ( f = annotatedFormula { formulas.push_back(f); } )+ - ; - -/** -* Matches on of the unary Boolean connectives. -* @return the kind of the Bollean connective -*/ -builtinOp returns [CVC4::Kind kind] -{ - Debug("parser") << "builtin: " << LT(1)->getText() << endl; -} - : NOT { kind = CVC4::kind::NOT; } - | IMPLIES { kind = CVC4::kind::IMPLIES; } - | AND { kind = CVC4::kind::AND; } - | OR { kind = CVC4::kind::OR; } - | XOR { kind = CVC4::kind::XOR; } - | IFF { kind = CVC4::kind::IFF; } - | EQUAL { kind = CVC4::kind::EQUAL; } - /* TODO: lt, gt, plus, minus, etc. */ - ; - -/** - * Matches a (possibly undeclared) predicate identifier (returning the string). - * @param check what kind of check to do with the symbol - */ -predicateName[DeclarationCheck check = CHECK_NONE] returns [std::string p] - : p = identifier[check] - ; - -/** - * Matches a (possibly undeclared) function identifier (returning the string) - * @param check what kind of check to do with the symbol - */ -functionName[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : name = identifier[check,SYM_FUNCTION] - ; - -/** - * Matches an previously declared function symbol (returning an Expr) - */ -functionSymbol returns [CVC4::Expr fun] -{ - string name; -} - : name = functionName[CHECK_DECLARED] - { checkFunction(name); - fun = getFunction(name); } - ; - -/** - * Matches an attribute name from the input (:attribute_name). - */ -attribute - : C_IDENTIFIER - ; - -variable[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : x:VAR_IDENTIFIER - { name = x->getText(); - checkDeclaration(name, check, SYM_VARIABLE); } - ; - -function_var[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : x:FUN_IDENTIFIER - { name = x->getText(); - checkDeclaration(name, check, SYM_FUNCTION); } - ; - -/** - * Matches the sort symbol, which can be an arbitrary identifier. - * @param check the check to perform on the name - */ -sortName[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : name = identifier[check,SYM_SORT] - ; - -sortSymbol returns [CVC4::Type* t] -{ - std::string name; -} - : name = sortName { t = getSort(name); } - ; - -functionDeclaration -{ - string name; - Type* t; - std::vector sorts; -} - : LPAREN name = functionName[CHECK_UNDECLARED] - t = sortSymbol // require at least one sort - { sorts.push_back(t); } - sortList[sorts] RPAREN - { t = functionType(sorts); - mkVar(name, t); } - ; - -/** - * Matches the declaration of a predicate and declares it - */ -predicateDeclaration -{ - string p_name; - std::vector p_sorts; - Type *t; -} - : LPAREN p_name = predicateName[CHECK_UNDECLARED] sortList[p_sorts] RPAREN - { t = predicateType(p_sorts); - mkVar(p_name, t); } - ; - -sortDeclaration -{ - string name; -} - : name = sortName[CHECK_UNDECLARED] - { newSort(name); } - ; - -/** - * Matches a sequence of sort symbols and fills them into the given vector. - */ -sortList[std::vector& sorts] -{ - Type* t; -} - : ( t = sortSymbol { sorts.push_back(t); })* - ; - -/** - * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. - */ -status returns [ SetBenchmarkStatusCommand::BenchmarkStatus status ] - : SAT { status = SetBenchmarkStatusCommand::SMT_SATISFIABLE; } - | UNSAT { status = SetBenchmarkStatusCommand::SMT_UNSATISFIABLE; } - | UNKNOWN { status = SetBenchmarkStatusCommand::SMT_UNKNOWN; } - ; - -/** - * Matches an annotation, which is an attribute name, with an optional user - */ -annotation - : attribute (USER_VALUE)? - ; - -/** - * Matches an identifier and returns a string. - * @param check what kinds of check to do on the symbol - * @return the id string - */ -identifier[DeclarationCheck check = CHECK_NONE, - SymbolType type = SYM_VARIABLE] -returns [std::string id] -{ - Debug("parser") << "identifier: " << LT(1)->getText() - << " check? " << toString(check) - << " type? " << toString(type) << endl; -} - : x:IDENTIFIER - { id = x->getText(); - checkDeclaration(id, check, type); } - ; - -- cgit v1.2.3