diff options
author | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-03-30 20:22:33 +0000 |
---|---|---|
committer | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-03-30 20:22:33 +0000 |
commit | 8730e9320a833a9eb0e65074f9988950b7424c0c (patch) | |
tree | 1cb09404256743e208fece079ba473595e05edcd /src/parser/cvc/Cvc.g | |
parent | 8c87c05ac56a5f29b2ae1e658f2d7d3b7b588163 (diff) |
Merging from branches/antlr3 (r246:354)
Diffstat (limited to 'src/parser/cvc/Cvc.g')
-rw-r--r-- | src/parser/cvc/Cvc.g | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g new file mode 100644 index 000000000..a9dff77bf --- /dev/null +++ b/src/parser/cvc/Cvc.g @@ -0,0 +1,496 @@ +/* ******************* */ +/* Cvc.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for CVC-LIB input language. + **/ + +grammar Cvc; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class CvcInput; +} +} + +extern +void SetCvcInput(CVC4::parser::CvcInput* input); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/cvc/cvc_input.h" +#include "util/output.h" +#include <vector> + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::CvcInput *input; + +extern +void SetCvcInput(CVC4::parser::CvcInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : formula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : c = command { $cmd = c; } + ; + +/** + * Matches a command of the input. If a declaration, it will return an empty + * command. + */ +command returns [CVC4::Command* cmd = 0] +@init { + Expr f; + Debug("parser-extra") << "command: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : ASSERT_TOK formula[f] SEMICOLON { cmd = new AssertCommand(f); } + | QUERY_TOK formula[f] SEMICOLON { cmd = new QueryCommand(f); } + | CHECKSAT_TOK formula[f] SEMICOLON { cmd = new CheckSatCommand(f); } + | CHECKSAT_TOK SEMICOLON { cmd = new CheckSatCommand(input->getTrueExpr()); } + | PUSH_TOK SEMICOLON { cmd = new PushCommand(); } + | POP_TOK SEMICOLON { cmd = new PopCommand(); } + | declaration[cmd] + | EOF + ; + +/** + * Match a declaration + */ + +declaration[CVC4::Command*& cmd] +@init { + std::vector<std::string> ids; + Type* t; + Debug("parser-extra") << "declaration: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : // FIXME: These could be type or function declarations, if that matters. + identifierList[ids, CHECK_UNDECLARED, SYM_VARIABLE] COLON declType[t,ids] SEMICOLON + { cmd = new DeclarationCommand(ids,t); } + ; + +/** Match the right-hand side of a declaration. Returns the type. */ +declType[CVC4::Type*& t, std::vector<std::string>& idList] +@init { + Debug("parser-extra") << "declType: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* A sort declaration (e.g., "T : TYPE") */ + TYPE_TOK { input->newSorts(idList); t = input->kindType(); } + | /* A variable declaration */ + type[t] { input->mkVars(idList,t); } + ; + +/** + * Match the type in a declaration and do the declaration binding. + * TODO: Actually parse sorts into Type objects. + */ +type[CVC4::Type*& t] +@init { + std::vector<Type*> typeList; + Debug("parser-extra") << "type: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* Simple type */ + baseType[t] + | /* Single-parameter function type */ + baseType[t] { typeList.push_back(t); } + ARROW_TOK baseType[t] + { t = input->functionType(typeList,t); } + | /* Multi-parameter function type */ + LPAREN baseType[t] + { typeList.push_back(t); } + (COMMA baseType[t] { typeList.push_back(t); } )+ + RPAREN ARROW_TOK baseType[t] + { t = input->functionType(typeList,t); } + ; + +/** + * Matches a list of identifiers separated by a comma and puts them in the + * given list. + * @param idList the list to fill with identifiers. + * @param check what kinds of check to perform on the symbols + */ +identifierList[std::vector<std::string>& idList, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] +@init { + std::string id; +} + : identifier[id,check,type] { idList.push_back(id); } + (COMMA identifier[id,check,type] { idList.push_back(id); })* + ; + + +/** + * Matches an identifier and returns a string. + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches a type. + * TODO: parse more types + */ +baseType[CVC4::Type*& t] +@init { + std::string id; + Debug("parser-extra") << "base type: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : BOOLEAN_TOK { t = input->booleanType(); } + | typeSymbol[t] + ; + +/** + * Matches a type identifier + */ +typeSymbol[CVC4::Type*& t] +@init { + std::string id; + Debug("parser-extra") << "type symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : identifier[id,CHECK_DECLARED,SYM_SORT] + { t = input->getSort(id); } + ; + +/** + * Matches a CVC4 formula. + * @return the expression representing the formula + */ +formula[CVC4::Expr& formula] +@init { + Debug("parser-extra") << "formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : iffFormula[formula] + ; + +/** + * Matches a comma-separated list of formulas + */ +formulaList[std::vector<CVC4::Expr>& formulas] +@init { + Expr f; +} + : formula[f] { formulas.push_back(f); } + ( COMMA formula[f] + { formulas.push_back(f); } + )* + ; + +/** + * Matches a Boolean IFF formula (right-associative) + */ +iffFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "<=> formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : impliesFormula[f] + ( IFF_TOK + iffFormula[e] + { f = input->mkExpr(CVC4::kind::IFF, f, e); } + )? + ; + +/** + * Matches a Boolean IMPLIES formula (right-associative) + */ +impliesFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "=> Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : orFormula[f] + ( IMPLIES_TOK impliesFormula[e] + { f = input->mkExpr(CVC4::kind::IMPLIES, f, e); } + )? + ; + +/** + * Matches a Boolean OR formula (left-associative) + */ +orFormula[CVC4::Expr& f] +@init { + std::vector<Expr> exprs; + Debug("parser-extra") << "OR Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : xorFormula[f] + ( OR_TOK { exprs.push_back(f); } + xorFormula[f] { exprs.push_back(f); } )* + { + if( exprs.size() > 0 ) { + f = input->mkExpr(CVC4::kind::OR, exprs); + } + } + ; + +/** + * Matches a Boolean XOR formula (left-associative) + */ +xorFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "XOR formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : andFormula[f] + ( XOR_TOK andFormula[e] + { f = input->mkExpr(CVC4::kind::XOR,f, e); } + )* + ; + +/** + * Matches a Boolean AND formula (left-associative) + */ +andFormula[CVC4::Expr& f] +@init { + std::vector<Expr> exprs; + Debug("parser-extra") << "AND Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : notFormula[f] + ( AND_TOK { exprs.push_back(f); } + notFormula[f] { exprs.push_back(f); } )* + { + if( exprs.size() > 0 ) { + f = input->mkExpr(CVC4::kind::AND, exprs); + } + } + ; + +/** + * Parses a NOT formula. + * @return the expression representing the formula + */ +notFormula[CVC4::Expr& f] +@init { + Debug("parser-extra") << "NOT formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* negation */ + NOT_TOK notFormula[f] + { f = input->mkExpr(CVC4::kind::NOT, f); } + | /* a boolean atom */ + predFormula[f] + ; + +predFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "predicate formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : term[f] + (EQUAL_TOK term[e] + { f = input->mkExpr(CVC4::kind::EQUAL, f, e); } + )? + ; // TODO: lt, gt, etc. + +/** + * Parses a term. + */ +term[CVC4::Expr& f] +@init { + std::string name; + std::vector<Expr> args; + Debug("parser-extra") << "term: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* function application */ + // { isFunction(AntlrInput::tokenText(LT(1))) }? + functionSymbol[f] + { args.push_back( f ); } + LPAREN formulaList[args] RPAREN + // TODO: check arity + { f = input->mkExpr(CVC4::kind::APPLY_UF, args); } + + | /* if-then-else */ + iteTerm[f] + + | /* parenthesized sub-formula */ + LPAREN formula[f] RPAREN + + /* constants */ + | TRUE_TOK { f = input->getTrueExpr(); } + | FALSE_TOK { f = input->getFalseExpr(); } + + | /* variable */ + identifier[name,CHECK_DECLARED,SYM_VARIABLE] + { f = input->getVariable(name); } + ; + +/** + * Parses an ITE term. + */ +iteTerm[CVC4::Expr& f] +@init { + std::vector<Expr> args; + Debug("parser-extra") << "ite: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : IF_TOK formula[f] { args.push_back(f); } + THEN_TOK formula[f] { args.push_back(f); } + iteElseTerm[f] { args.push_back(f); } + ENDIF_TOK + { f = input->mkExpr(CVC4::kind::ITE, args); } + ; + +/** + * Parses the else part of the ITE, i.e. ELSE f, or ELSIF b THEN f1 ... + */ +iteElseTerm[CVC4::Expr& f] +@init { + std::vector<Expr> args; + Debug("parser-extra") << "else: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : ELSE_TOK formula[f] + | ELSEIF_TOK iteCondition = formula[f] { args.push_back(f); } + THEN_TOK iteThen = formula[f] { args.push_back(f); } + iteElse = iteElseTerm[f] { args.push_back(f); } + { f = input->mkExpr(CVC4::kind::ITE, args); } + ; + +/** + * Parses a function symbol (an identifier). + * @param what kind of check to perform on the id + * @return the predicate symol + */ +functionSymbol[CVC4::Expr& f] +@init { + Debug("parser-extra") << "function symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; + std::string name; +} + : identifier[name,CHECK_DECLARED,SYM_FUNCTION] + { input->checkFunction(name); + f = input->getFunction(name); } + ; + + +// Keywords + +AND_TOK : 'AND'; +ASSERT_TOK : 'ASSERT'; +BOOLEAN_TOK : 'BOOLEAN'; +CHECKSAT_TOK : 'CHECKSAT'; +ECHO_TOK : 'ECHO'; +ELSEIF_TOK : 'ELSIF'; +ELSE_TOK : 'ELSE'; +ENDIF_TOK : 'ENDIF'; +FALSE_TOK : 'FALSE'; +IF_TOK : 'IF'; +NOT_TOK : 'NOT'; +OR_TOK : 'OR'; +POPTO_TOK : 'POPTO'; +POP_TOK : 'POP'; +PRINT_TOK : 'PRINT'; +PUSH_TOK : 'PUSH'; +QUERY_TOK : 'QUERY'; +THEN_TOK : 'THEN'; +TRUE_TOK : 'TRUE'; +TYPE_TOK : 'TYPE'; +XOR_TOK : 'XOR'; + +// Symbols + +COLON : ':'; +COMMA : ','; +LPAREN : '('; +RPAREN : ')'; +SEMICOLON : ';'; + +// Operators + +IMPLIES_TOK : '=>'; +IFF_TOK : '<=>'; +ARROW_TOK : '->'; +EQUAL_TOK : '='; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL: (DIGIT)+; + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment ALPHA : 'a'..'z' | 'A'..'Z'; + +/** + * Matches the digits (0-9) + */ +fragment DIGIT : '0'..'9'; + +/** + * Matches and skips whitespace in the input and ignores it. + */ +WHITESPACE : (' ' | '\t' | '\f' | '\r' | '\n') { $channel = HIDDEN;; }; + +/** + * Matches the comments and ignores them + */ +COMMENT : '%' (~('\n' | '\r'))* { $channel = HIDDEN;; }; + |