/* ******************* */ /*! \file Tptp.g ** \verbatim ** Original author: bobot ** Major contributors: none ** Minor contributors (to current version): mdeters ** This file is part of the CVC4 prototype. ** Copyright (c) 2009-2012 New York University and The University of Iowa ** See the file COPYING in the top-level source directory for licensing ** information.\endverbatim ** ** \brief Parser for TPTP input language. ** ** Parser for TPTP input language. ** cf. http://www.cs.miami.edu/~tptp/cgi-bin/SeeTPTP?Category=Documents&File=SyntaxBNF **/ grammar Tptp; options { // C output for antlr language = 'C'; // Skip the default error handling, just break with exceptions // defaultErrorHandler = false; // Only lookahead of <= k requested (disable for LL* parsing) // Note that CVC4's BoundedTokenBuffer requires a fixed k ! // If you change this k, change it also in tptp_input.cpp ! k = 1; }/* options */ @header { /** ** This file is part of CVC4. ** Copyright (c) 2009-2013 New York University and The University of Iowa ** See the file COPYING in the top-level source directory for licensing ** information. **/ }/* @header */ @lexer::includes { /** This suppresses warnings about the redefinition of token symbols between * different parsers. The redefinitions should be harmless as long as no * client: (a) #include's the lexer headers for two grammars AND (b) uses the * token symbol definitions. */ #pragma GCC system_header /* This improves performance by ~10 percent on big inputs. * This option is only valid if we know the input is ASCII (or some 8-bit encoding). * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. * Otherwise, we have to let the lexer detect the encoding at runtime. */ #define ANTLR3_INLINE_INPUT_ASCII #include "parser/antlr_tracing.h" }/* @lexer::includes */ @lexer::postinclude { #include #include "parser/tptp/tptp.h" #include "parser/antlr_input.h" using namespace CVC4; using namespace CVC4::parser; /* These need to be macros so they can refer to the PARSER macro, which will be defined * by ANTLR *after* this section. (If they were functions, PARSER would be undefined.) */ #undef PARSER_STATE #define PARSER_STATE ((Tptp*)LEXER->super) #undef EXPR_MANAGER #define EXPR_MANAGER PARSER_STATE->getExprManager() #undef MK_EXPR #define MK_EXPR EXPR_MANAGER->mkExpr #undef MK_CONST #define MK_CONST EXPR_MANAGER->mkConst #define UNSUPPORTED PARSER_STATE->unimplementedFeature }/* @lexer::postinclude */ @parser::includes { #include "expr/command.h" #include "parser/parser.h" #include "parser/tptp/tptp.h" #include "parser/antlr_tracing.h" }/* @parser::includes */ @parser::postinclude { #include "expr/expr.h" #include "expr/kind.h" #include "expr/type.h" #include "parser/antlr_input.h" #include "parser/parser.h" #include "parser/tptp/tptp.h" #include "util/integer.h" #include "util/output.h" #include "util/rational.h" #include using namespace CVC4; using namespace CVC4::parser; /* These need to be macros so they can refer to the PARSER macro, which will be defined * by ANTLR *after* this section. (If they were functions, PARSER would be undefined.) */ #undef PARSER_STATE #define PARSER_STATE ((Tptp*)PARSER->super) #undef EXPR_MANAGER #define EXPR_MANAGER PARSER_STATE->getExprManager() #undef MK_EXPR #define MK_EXPR EXPR_MANAGER->mkExpr #undef MK_CONST #define MK_CONST EXPR_MANAGER->mkConst #define UNSUPPORTED PARSER_STATE->unimplementedFeature }/* parser::postinclude */ /** * Parses an expression. * @return the parsed expression, or the Null Expr if we've reached the end of the input */ parseExpr returns [CVC4::parser::tptp::myExpr expr] : cnfFormula[expr] | EOF ; /** * Parses a command * @return the parsed command, or NULL if we've reached the end of the input */ parseCommand returns [CVC4::Command* cmd = NULL] @declarations { Expr expr; Tptp::FormulaRole fr; std::string name; std::string incl_args; } // : LPAREN_TOK c = command RPAREN_TOK { $cmd = c; } : CNF_TOK LPAREN_TOK nameN[name] COMMA_TOK formulaRole[fr] COMMA_TOK { PARSER_STATE->cnf=true; PARSER_STATE->pushScope(); } cnfFormula[expr] { PARSER_STATE->popScope(); std::vector bvl = PARSER_STATE->getFreeVar(); if(!bvl.empty()){ expr = MK_EXPR(kind::FORALL,MK_EXPR(kind::BOUND_VAR_LIST,bvl),expr); }; } (COMMA_TOK anything*)? RPAREN_TOK DOT_TOK { cmd = PARSER_STATE->makeCommand(fr,expr); } | FOF_TOK LPAREN_TOK nameN[name] COMMA_TOK formulaRole[fr] COMMA_TOK { PARSER_STATE->cnf=false; } fofFormula[expr] (COMMA_TOK anything*)? RPAREN_TOK DOT_TOK { cmd = PARSER_STATE->makeCommand(fr,expr); } | INCLUDE_TOK LPAREN_TOK unquotedFileName[name] ( COMMA_TOK LBRACK_TOK nameN[incl_args] ( COMMA_TOK nameN[incl_args] )* RBRACK_TOK )? RPAREN_TOK DOT_TOK { PARSER_STATE->includeFile(name); // The command of the included file will be produce at the new parseCommand call cmd = new EmptyCommand("include::" + name); } | EOF { PARSER_STATE->preemptCommand(new CheckSatCommand(MK_CONST(bool(true)))); cmd = NULL; } ; /* Parse a formula Role */ formulaRole[CVC4::parser::Tptp::FormulaRole & role] : LOWER_WORD { std::string r = AntlrInput::tokenText($LOWER_WORD); if (r == "axiom") role = Tptp::FR_AXIOM; else if (r == "hypothesis") role = Tptp::FR_HYPOTHESIS; else if (r == "definition") role = Tptp::FR_DEFINITION; else if (r == "assumption") role = Tptp::FR_ASSUMPTION; else if (r == "lemma") role = Tptp::FR_LEMMA; else if (r == "theorem") role = Tptp::FR_THEOREM; else if (r == "negated_conjecture") role = Tptp::FR_NEGATED_CONJECTURE; else if (r == "conjecture") role = Tptp::FR_CONJECTURE; else if (r == "unknown") role = Tptp::FR_UNKNOWN; else if (r == "plain") role = Tptp::FR_PLAIN; else if (r == "fi_domain") role = Tptp::FR_FI_DOMAIN; else if (r == "fi_functor") role = Tptp::FR_FI_FUNCTORS; else if (r == "fi_predicate") role = Tptp::FR_FI_PREDICATES; else if (r == "type") role = Tptp::FR_TYPE; else PARSER_STATE->parseError("Invalid formula role: " + r); } ; /*******/ /* CNF */ /* It can parse a little more than the cnf grammar: false and true can appear. Normally only false can appear and only at top level */ cnfFormula[CVC4::Expr& expr] : LPAREN_TOK disjunction[expr] RPAREN_TOK | disjunction[expr] //| FALSE_TOK { expr = MK_CONST(bool(false)); } ; disjunction[CVC4::Expr& expr] @declarations { std::vector args; } : literal[expr] {args.push_back(expr); } ( OR_TOK literal[expr] {args.push_back(expr); } )* { if(args.size() > 1){ expr = MK_EXPR(kind::OR,args); } // else its already in the expr } ; literal[CVC4::Expr& expr] : atomicFormula[expr] | NOT_TOK atomicFormula[expr] { expr = MK_EXPR(kind::NOT,expr); } // | folInfixUnary[expr] ; atomicFormula[CVC4::Expr& expr] @declarations { Expr expr2; std::string name; std::vector args; bool equal; } : atomicWord[name] (LPAREN_TOK arguments[args] RPAREN_TOK)? ( ( equalOp[equal] //equality/disequality between terms { PARSER_STATE->makeApplication(expr,name,args,true); } term[expr2] { expr = MK_EXPR(kind::EQUAL, expr, expr2); if(!equal) expr = MK_EXPR(kind::NOT,expr); } ) | //predicate { PARSER_STATE->makeApplication(expr,name,args,false); } ) | simpleTerm[expr] equalOp[equal] term[expr2] { expr = MK_EXPR(kind::EQUAL, expr, expr2); if(!equal) expr = MK_EXPR(kind::NOT,expr); } | definedProp[expr] ; //%----Using removes a reduce/reduce ambiguity in lex/yacc. //%----Note: "defined" means a word starting with one $ and "system" means $$. definedProp[CVC4::Expr& expr] : TRUE_TOK { expr = MK_CONST(bool(true)); } | FALSE_TOK { expr = MK_CONST(bool(false)); } ; //%----Pure CNF should not use $true or $false in problems, and use $false only //%----at the roots of a refutation. equalOp[bool & equal] : EQUAL_TOK {equal = true;} | DISEQUAL_TOK {equal = false;} ; term[CVC4::Expr& expr] : functionTerm[expr] | simpleTerm[expr] // | conditionalTerm // | let_term ; /* Not an application */ simpleTerm[CVC4::Expr& expr] : variable[expr] | NUMBER { expr = PARSER_STATE->d_tmp_expr; } | DISTINCT_OBJECT { expr = PARSER_STATE->convertStrToUnsorted( MK_CONST(AntlrInput::tokenText($DISTINCT_OBJECT))); } ; functionTerm[CVC4::Expr& expr] : plainTerm[expr] // | | ; plainTerm[CVC4::Expr& expr] @declarations{ std::string name; std::vector args; } : atomicWord[name] (LPAREN_TOK arguments[args] RPAREN_TOK)? { PARSER_STATE->makeApplication(expr,name,args,true); } ; arguments[std::vector & args] @declarations{ Expr expr; } : term[expr] { args.push_back(expr); } ( COMMA_TOK term[expr] { args.push_back(expr); } )* ; variable[CVC4::Expr & expr] : UPPER_WORD { std::string name = AntlrInput::tokenText($UPPER_WORD); if(!PARSER_STATE->cnf || PARSER_STATE->isDeclared(name)){ expr = PARSER_STATE->getVariable(name); } else { expr = PARSER_STATE->mkBoundVar(name, PARSER_STATE->d_unsorted); if(PARSER_STATE->cnf) PARSER_STATE->addFreeVar(expr); } } ; /*******/ /* FOF */ fofFormula[CVC4::Expr & expr] : fofLogicFormula[expr] ; fofLogicFormula[CVC4::Expr & expr] @declarations{ tptp::NonAssoc na; std::vector< Expr > args; Expr expr2; } : fofUnitaryFormula[expr] ( //Non Assoc <=> <~> ~& ~| ( fofBinaryNonAssoc[na] fofUnitaryFormula[expr2] { switch(na){ case tptp::NA_IFF: expr = MK_EXPR(kind::IFF,expr,expr2); break; case tptp::NA_REVIFF: expr = MK_EXPR(kind::XOR,expr,expr2); break; case tptp::NA_IMPLIES: expr = MK_EXPR(kind::IMPLIES,expr,expr2); break; case tptp::NA_REVIMPLIES: expr = MK_EXPR(kind::IMPLIES,expr2,expr); break; case tptp::NA_REVOR: expr = MK_EXPR(kind::NOT,MK_EXPR(kind::OR,expr,expr2)); break; case tptp::NA_REVAND: expr = MK_EXPR(kind::NOT,MK_EXPR(kind::AND,expr,expr2)); break; } } ) | //And & ( { args.push_back(expr); } ( AND_TOK fofUnitaryFormula[expr] { args.push_back(expr); } )+ { expr = MK_EXPR(kind::AND,args); } ) | //Or | ( { args.push_back(expr); } ( OR_TOK fofUnitaryFormula[expr] { args.push_back(expr); } )+ { expr = MK_EXPR(kind::OR,args); } ) )? ; fofUnitaryFormula[CVC4::Expr & expr] @declarations{ Kind kind; std::vector< Expr > bv; } : atomicFormula[expr] | LPAREN_TOK fofLogicFormula[expr] RPAREN_TOK | NOT_TOK fofUnitaryFormula[expr] { expr = MK_EXPR(kind::NOT,expr); } | // Quantified folQuantifier[kind] LBRACK_TOK {PARSER_STATE->pushScope();} ( bindvariable[expr] { bv.push_back(expr); } ( COMMA_TOK bindvariable[expr] { bv.push_back(expr); } )* ) RBRACK_TOK COLON_TOK fofUnitaryFormula[expr] { PARSER_STATE->popScope(); expr = MK_EXPR(kind, MK_EXPR(kind::BOUND_VAR_LIST, bv), expr); } ; bindvariable[CVC4::Expr & expr] : UPPER_WORD { std::string name = AntlrInput::tokenText($UPPER_WORD); expr = PARSER_STATE->mkBoundVar(name, PARSER_STATE->d_unsorted); } ; fofBinaryNonAssoc[CVC4::parser::tptp::NonAssoc & na] : IFF_TOK { na = tptp::NA_IFF; } | REVIFF_TOK { na = tptp::NA_REVIFF; } | REVOR_TOK { na = tptp::NA_REVOR; } | REVAND_TOK { na = tptp::NA_REVAND; } | IMPLIES_TOK { na = tptp::NA_IMPLIES; } | REVIMPLIES_TOK { na = tptp::NA_REVIMPLIES; } ; folQuantifier[CVC4::Kind & kind] : BANG_TOK { kind = kind::FORALL; } | MARK_TOK { kind = kind::EXISTS; } ; /***********************************************/ /* Anything well parenthesis */ anything : LPAREN_TOK anything* RPAREN_TOK | LBRACK_TOK anything* RBRACK_TOK | COMMA_TOK | DOT_TOK | COLON_TOK | OR_TOK | NOT_TOK | BANG_TOK | MARK_TOK | AND_TOK | IFF_TOK | IMPLIES_TOK | REVIMPLIES_TOK | REVIFF_TOK | REVOR_TOK | REVAND_TOK | TIMES_TOK | PLUS_TOK | MINUS_TOK | TRUE_TOK | FALSE_TOK | EQUAL_TOK | DISEQUAL_TOK | CNF_TOK | FOF_TOK | THF_TOK | TFF_TOK | INCLUDE_TOK | DISTINCT_OBJECT | UPPER_WORD | LOWER_WORD | LOWER_WORD_SINGLE_QUOTED | SINGLE_QUOTED | NUMBER | DEFINED_SYMBOL ; /*********/ //punctuation COMMA_TOK : ','; DOT_TOK : '.'; LPAREN_TOK : '('; RPAREN_TOK : ')'; LBRACK_TOK : '['; RBRACK_TOK : ']'; COLON_TOK : ':'; //operator OR_TOK : '|'; NOT_TOK : '~'; BANG_TOK : '!'; MARK_TOK : '?'; AND_TOK : '&'; IFF_TOK : '<=>'; IMPLIES_TOK : '=>'; REVIMPLIES_TOK : '<='; REVIFF_TOK : '<~>'; REVOR_TOK : '~|'; REVAND_TOK : '~&'; TIMES_TOK : '*'; PLUS_TOK : '+'; MINUS_TOK : '-'; //predicate TRUE_TOK : '$true'; FALSE_TOK : '$false'; EQUAL_TOK : '='; DISEQUAL_TOK : '!='; //KEYWORD CNF_TOK : 'cnf'; FOF_TOK : 'fof'; THF_TOK : 'thf'; TFF_TOK : 'tff'; INCLUDE_TOK : 'include'; //Other defined symbols, must be defined after all the other DEFINED_SYMBOL : '$' LOWER_WORD; /*********/ /* Token */ /* * Matches and skips whitespace in the input. */ WHITESPACE : (' ' | '\t' | '\f' | '\r' | '\n')+ { SKIP(); } ; /** * Matches a double or single quoted string literal. Escaping is supported, and * escape character '\' has to be escaped. */ DISTINCT_OBJECT : '"' (DO_CHAR)* '"' ; fragment DO_CHAR : ' '..'!'| '#'..'[' | ']'..'~' | '\\"' | '\\\\'; //The order of this two rules is important LOWER_WORD_SINGLE_QUOTED : '\'' LOWER_WORD '\'' ; SINGLE_QUOTED : '\'' (SQ_CHAR)* '\'' ; fragment SQ_CHAR : ' '..'&' | '('..'[' | ']'..'~' | '\\\'' | '\\\\'; /* Define upper (variable) and lower (everything else) word */ fragment NUMERIC : '0'..'9'; fragment LOWER_ALPHA : 'a'..'z'; fragment UPPER_ALPHA : 'A'..'Z'; fragment ALPHA_NUMERIC : LOWER_ALPHA | UPPER_ALPHA | NUMERIC | '_'; UPPER_WORD : UPPER_ALPHA ALPHA_NUMERIC*; LOWER_WORD : LOWER_ALPHA ALPHA_NUMERIC*; /* filename */ unquotedFileName[std::string & name] /* Beware fileName identifier is used by the backend ... */ : (s=LOWER_WORD_SINGLE_QUOTED | s=SINGLE_QUOTED) { name = AntlrInput::tokenText($s); name = name.substr(1, name.size() - 2 ); }; /* axiom name */ nameN[std::string & name] : atomicWord[name] | NUMBER { name = AntlrInput::tokenText($NUMBER); } ; /* atomic word everything (fof, cnf, thf, tff, include must not be keyword at this position ) */ atomicWord[std::string & name] : FOF_TOK { name = "fof"; } | CNF_TOK { name = "cnf"; } | THF_TOK { name = "thf"; } | TFF_TOK { name = "tff"; } | INCLUDE_TOK { name = "include"; } | LOWER_WORD { name = AntlrInput::tokenText($LOWER_WORD); } | LOWER_WORD_SINGLE_QUOTED //the lower word single quoted are considered without quote { /* strip off the quotes */ name = AntlrInput::tokenTextSubstr($LOWER_WORD_SINGLE_QUOTED,1, ($LOWER_WORD_SINGLE_QUOTED->stop - $LOWER_WORD_SINGLE_QUOTED->start) - 1); } | SINGLE_QUOTED {name = AntlrInput::tokenText($SINGLE_QUOTED); }; //for the others the quote remains /** I don't understand how is made the difference between rational and real in SyntaxBNF. So I put all in rational */ /* Rational */ fragment DOT : '.'; fragment EXPONENT : 'E'|'e'; fragment SLASH : '/'; fragment DECIMAL : NUMERIC+; /* Currently we put all in the rationnal type */ fragment SIGN[bool & pos] : PLUS_TOK | MINUS_TOK { pos = false; } ; NUMBER @declarations{ bool pos = true; bool posE = true; } : ( SIGN[pos]? num=DECIMAL { Integer i (AntlrInput::tokenText($num)); Rational r = ( pos ? i : -i ); PARSER_STATE->d_tmp_expr = MK_CONST(r); } | SIGN[pos]? num=DECIMAL DOT den=DECIMAL (EXPONENT SIGN[posE]? e=DECIMAL)? { std::string snum = AntlrInput::tokenText($num); std::string sden = AntlrInput::tokenText($den); /* compute the numerator */ Integer inum( snum + sden ); // The sign inum = pos ? inum : - inum; // The exponent size_t exp = ($e == NULL ? 0 : AntlrInput::tokenToUnsigned($e)); // Decimal part size_t dec = sden.size(); /* multiply it by 10 raised to the exponent reduced by the * number of decimal place in den (dec) */ Rational r; if( !posE ) r = Rational(inum, Integer(10).pow(exp + dec)); else if( exp == dec ) r = Rational(inum); else if( exp > dec ) r = Rational(inum * Integer(10).pow(exp - dec)); else r = Rational(inum, Integer(10).pow(dec - exp)); PARSER_STATE->d_tmp_expr = MK_CONST( r ); } | SIGN[pos]? num=DECIMAL SLASH den=DECIMAL { Integer inum(AntlrInput::tokenText($num)); Integer iden(AntlrInput::tokenText($den)); PARSER_STATE->d_tmp_expr = MK_CONST( Rational(pos ? inum : -inum, iden)); } ) { //Put a convertion around it PARSER_STATE->d_tmp_expr = PARSER_STATE->convertRatToUnsorted( PARSER_STATE->d_tmp_expr ); } ; /** * Matches the comments and ignores them */ COMMENT : '%' (~('\n' | '\r'))* { SKIP(); } //comment line | '/*' ( options {greedy=false;} : . )* '*/' { SKIP(); } //comment block ;