diff options
Diffstat (limited to 'src/parser/smt/Smt.g')
-rw-r--r-- | src/parser/smt/Smt.g | 552 |
1 files changed, 552 insertions, 0 deletions
diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g new file mode 100644 index 000000000..7095c7269 --- /dev/null +++ b/src/parser/smt/Smt.g @@ -0,0 +1,552 @@ +/* ******************* */ +/* Smt.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for SMT-LIB input language. + **/ + +grammar Smt; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class SmtInput; +} +} + +extern +void SetSmtInput(CVC4::parser::SmtInput* smt); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/smt/smt_input.h" +#include "util/output.h" +#include <vector> + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::SmtInput *input; + +extern +void SetSmtInput(CVC4::parser::SmtInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : annotatedFormula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : b = benchmark { $cmd = b; } + ; + +/** + * Matches the whole SMT-LIB benchmark. + * @return the sequence command containing the whole problem + */ +benchmark returns [CVC4::Command* cmd] + : LPAREN_TOK BENCHMARK_TOK IDENTIFIER c = benchAttributes RPAREN_TOK + { $cmd = c; } + | EOF { $cmd = 0; } + ; + +/** + * Matches a sequence of benchmark attributes and returns a pointer to a + * command sequence. + * @return the command sequence + */ +benchAttributes returns [CVC4::CommandSequence* cmd_seq] +@init { + cmd_seq = new CommandSequence(); +} + : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ + ; + +/** + * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns + * a corresponding command + * @return a command corresponding to the attribute + */ +benchAttribute returns [CVC4::Command* smt_command] +@declarations { + std::string name; + BenchmarkStatus b_status; + Expr expr; +} + : LOGIC_TOK identifier[name,CHECK_NONE,SYM_VARIABLE] + { input->setLogic(name); + smt_command = new SetBenchmarkLogicCommand(name); } + | ASSUMPTION_TOK annotatedFormula[expr] + { smt_command = new AssertCommand(expr); } + | FORMULA_TOK annotatedFormula[expr] + { smt_command = new CheckSatCommand(expr); } + | STATUS_TOK status[b_status] + { smt_command = new SetBenchmarkStatusCommand(b_status); } + | EXTRAFUNS_TOK LPAREN_TOK (functionDeclaration)+ RPAREN_TOK + | EXTRAPREDS_TOK LPAREN_TOK (predicateDeclaration)+ RPAREN_TOK + | EXTRASORTS_TOK LPAREN_TOK sortDeclaration+ RPAREN_TOK + | NOTES_TOK STRING_LITERAL + | annotation + ; + +/** + * Matches an annotated formula. + * @return the expression representing the formula + */ +annotatedFormula[CVC4::Expr& expr] +@init { + Debug("parser") << "annotated formula: " << AntlrInput::tokenText(LT(1)) << std::endl; + Kind kind; + std::string name; + std::vector<Expr> args; /* = getExprVector(); */ +} + : /* a built-in operator application */ + LPAREN_TOK builtinOp[kind] annotatedFormulas[args,expr] RPAREN_TOK + { input->checkArity(kind, args.size()); + if((kind == CVC4::kind::AND || kind == CVC4::kind::OR) && args.size() == 1) { + /* Unary AND/OR can be replaced with the argument. + It just so happens expr should already by the only argument. */ + Assert( expr == args[0] ); + } else { + expr = input->mkExpr(kind, args); + } + } + + | /* A non-built-in function application */ + + // Semantic predicate not necessary if parenthesized subexpressions + // are disallowed + // { isFunction(LT(2)->getText()) }? + + LPAREN_TOK + functionSymbol[expr] + { args.push_back(expr); } + annotatedFormulas[args,expr] RPAREN_TOK + // TODO: check arity + { expr = input->mkExpr(CVC4::kind::APPLY_UF,args); } + + | /* An ite expression */ + LPAREN_TOK (ITE_TOK | IF_THEN_ELSE_TOK) + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + RPAREN_TOK + { expr = input->mkExpr(CVC4::kind::ITE, args); } + + | /* a let/flet binding */ + LPAREN_TOK + (LET_TOK LPAREN_TOK var_identifier[name,CHECK_UNDECLARED] + | FLET_TOK LPAREN_TOK fun_identifier[name,CHECK_UNDECLARED] ) + annotatedFormula[expr] RPAREN_TOK + { input->defineVar(name,expr); } + annotatedFormula[expr] + RPAREN_TOK + { input->undefineVar(name); } + + | /* a variable */ + ( identifier[name,CHECK_DECLARED,SYM_VARIABLE] + | var_identifier[name,CHECK_DECLARED] + | fun_identifier[name,CHECK_DECLARED] ) + { expr = input->getVariable(name); } + + /* constants */ + | TRUE_TOK { expr = input->getTrueExpr(); } + | FALSE_TOK { expr = input->getFalseExpr(); } + /* TODO: let, flet, quantifiers, arithmetic constants */ + ; + +/** + * Matches a sequence of annotated formulas and puts them into the formulas + * vector. + * @param formulas the vector to fill with formulas + * @param expr an Expr reference for the elements of the sequence + */ +/* NOTE: We pass an Expr in here just to avoid allocating a fresh Expr every + * time through this rule. */ +annotatedFormulas[std::vector<CVC4::Expr>& formulas, CVC4::Expr& expr] + : ( annotatedFormula[expr] { formulas.push_back(expr); } )+ + ; + +/** +* Matches on of the unary Boolean connectives. +* @return the kind of the Bollean connective +*/ +builtinOp[CVC4::Kind& kind] +@init { + Debug("parser") << "builtin: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : NOT_TOK { $kind = CVC4::kind::NOT; } + | IMPLIES_TOK { $kind = CVC4::kind::IMPLIES; } + | AND_TOK { $kind = CVC4::kind::AND; } + | OR_TOK { $kind = CVC4::kind::OR; } + | XOR_TOK { $kind = CVC4::kind::XOR; } + | IFF_TOK { $kind = CVC4::kind::IFF; } + | EQUAL_TOK { $kind = CVC4::kind::EQUAL; } + | DISTINCT_TOK { $kind = CVC4::kind::DISTINCT; } + /* TODO: lt, gt, plus, minus, etc. */ + ; + +/** + * Matches a (possibly undeclared) predicate identifier (returning the string). + * @param check what kind of check to do with the symbol + */ +predicateName[std::string& name, CVC4::parser::DeclarationCheck check] + : functionName[name,check] + ; + +/** + * Matches a (possibly undeclared) function identifier (returning the string) + * @param check what kind of check to do with the symbol + */ +functionName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_FUNCTION] + ; + +/** + * Matches an previously declared function symbol (returning an Expr) + */ +functionSymbol[CVC4::Expr& fun] +@declarations { + std::string name; +} + : functionName[name,CHECK_DECLARED] + { input->checkFunction(name); + fun = input->getFunction(name); } + ; + +/** + * Matches an attribute name from the input (:attribute_name). + */ +attribute + : ATTR_IDENTIFIER + ; + + + +functionDeclaration +@declarations { + std::string name; + std::vector<Type*> sorts; +} + : LPAREN_TOK functionName[name,CHECK_UNDECLARED] + t = sortSymbol // require at least one sort + { sorts.push_back(t); } + sortList[sorts] RPAREN_TOK + { t = input->functionType(sorts); + input->mkVar(name, t); } + ; + +/** + * Matches the declaration of a predicate and declares it + */ +predicateDeclaration +@declarations { + std::string name; + std::vector<Type*> p_sorts; +} + : LPAREN_TOK predicateName[name,CHECK_UNDECLARED] sortList[p_sorts] RPAREN_TOK + { Type *t = input->predicateType(p_sorts); + input->mkVar(name, t); } + ; + +sortDeclaration +@declarations { + std::string name; +} + : sortName[name,CHECK_UNDECLARED] + { Debug("parser") << "sort decl: '" << name << "'" << std::endl; + input->newSort(name); } + ; + +/** + * Matches a sequence of sort symbols and fills them into the given vector. + */ +sortList[std::vector<CVC4::Type*>& sorts] + : ( t = sortSymbol { sorts.push_back(t); })* + ; + +/** + * Matches the sort symbol, which can be an arbitrary identifier. + * @param check the check to perform on the name + */ +sortName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_SORT] + ; + +sortSymbol returns [CVC4::Type* t] +@declarations { + std::string name; +} + : sortName[name,CHECK_NONE] + { $t = input->getSort(name); } + ; + +/** + * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. + */ +status[ CVC4::BenchmarkStatus& status ] + : SAT_TOK { $status = SMT_SATISFIABLE; } + | UNSAT_TOK { $status = SMT_UNSATISFIABLE; } + | UNKNOWN_TOK { $status = SMT_UNKNOWN; } + ; + +/** + * Matches an annotation, which is an attribute name, with an optional user + */ +annotation + : attribute (USER_VALUE)? + ; + +/** + * Matches an identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + * @param type the intended namespace for the identifier + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + Debug("parser") << "identifier: " << id + << " check? " << toString(check) + << " type? " << toString(type) << std::endl; + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches an variable identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +var_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : VAR_IDENTIFIER + { id = AntlrInput::tokenText($VAR_IDENTIFIER); + Debug("parser") << "var_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_VARIABLE); } + ; + +/** + * Matches an function identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +fun_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : FUN_IDENTIFIER + { id = AntlrInput::tokenText($FUN_IDENTIFIER); + Debug("parser") << "fun_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_FUNCTION); } + ; + + +// Base SMT-LIB tokens +DISTINCT_TOK : 'distinct'; +ITE_TOK : 'ite'; +IF_THEN_ELSE_TOK : 'if_then_else'; +TRUE_TOK : 'true'; +FALSE_TOK : 'false'; +NOT_TOK : 'not'; +IMPLIES_TOK : 'implies'; +AND_TOK : 'and'; +OR_TOK : 'or'; +XOR_TOK : 'xor'; +IFF_TOK : 'iff'; +EXISTS_TOK : 'exists'; +FORALL_TOK : 'forall'; +LET_TOK : 'let'; +FLET_TOK : 'flet'; +THEORY_TOK : 'theory'; +SAT_TOK : 'sat'; +UNSAT_TOK : 'unsat'; +UNKNOWN_TOK : 'unknown'; +BENCHMARK_TOK : 'benchmark'; + +// The SMT attribute tokens +LOGIC_TOK : ':logic'; +ASSUMPTION_TOK : ':assumption'; +FORMULA_TOK : ':formula'; +STATUS_TOK : ':status'; +EXTRASORTS_TOK : ':extrasorts'; +EXTRAFUNS_TOK : ':extrafuns'; +EXTRAPREDS_TOK : ':extrapreds'; +NOTES_TOK : ':notes'; + +// arithmetic symbols +EQUAL_TOK : '='; +LESS_THAN_TOK : '<'; +GREATER_THAN_TOK : '>'; +AMPERSAND_TOK : '&'; +AT_TOK : '@'; +POUND_TOK : '#'; +PLUS_TOK : '+'; +MINUS_TOK : '-'; +STAR_TOK : '*'; +DIV_TOK : '/'; +PERCENT_TOK : '%'; +PIPE_TOK : '|'; +TILDE_TOK : '~'; + +// Language meta-symbols +//QUESTION_TOK : '?'; +//DOLLAR_TOK : '$'; +LPAREN_TOK : '('; +RPAREN_TOK : ')'; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER /*options { paraphrase = 'an identifier'; testLiterals = true; }*/ + : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* + ; + +/** + * Matches an identifier starting with a colon. + */ +ATTR_IDENTIFIER /*options { paraphrase = 'an identifier starting with a colon'; testLiterals = true; }*/ + : ':' IDENTIFIER + ; + +/** + * Matches an identifier starting with a question mark. + */ +VAR_IDENTIFIER + : '?' IDENTIFIER + ; + +/** + * Matches an identifier starting with a dollar sign. + */ +FUN_IDENTIFIER + : '$' IDENTIFIER + ; + +/** + * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with + * an open brace and end with closed brace. + */ +USER_VALUE + : '{' + ( ~('{' | '}') )* + '}' + ; + + +/** + * Matches and skips whitespace in the input. + */ +WHITESPACE /*options { paraphrase = 'whitespace'; }*/ + : (' ' | '\t' | '\f' | '\r' | '\n')+ { $channel = HIDDEN;; } + ; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL_TOK /*options { paraphrase = 'a numeral'; }*/ + : (DIGIT)+ + ; + +/** + * Matches a double quoted string literal. Escaping is supported, and escape + * character '\' has to be escaped. + */ +STRING_LITERAL /*options { paraphrase = 'a string literal'; }*/ + : '"' (ESCAPE | ~('"'|'\\'))* '"' + ; + +/** + * Matches the comments and ignores them + */ +COMMENT /*options { paraphrase = 'comment'; }*/ + : ';' (~('\n' | '\r'))* { $channel = HIDDEN;; } + ; + + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment +ALPHA /*options { paraphrase = 'a letter'; }*/ + : 'a'..'z' + | 'A'..'Z' + ; + +/** + * Matches the digits (0-9) + */ +fragment +DIGIT /*options { paraphrase = 'a digit'; }*/ + : '0'..'9' + ; + + +/** + * Matches an allowed escaped character. + */ +fragment ESCAPE + : '\\' ('"' | '\\' | 'n' | 't' | 'r') + ; + |