diff options
author | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-03-30 20:22:33 +0000 |
---|---|---|
committer | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-03-30 20:22:33 +0000 |
commit | 8730e9320a833a9eb0e65074f9988950b7424c0c (patch) | |
tree | 1cb09404256743e208fece079ba473595e05edcd /src/parser | |
parent | 8c87c05ac56a5f29b2ae1e658f2d7d3b7b588163 (diff) |
Merging from branches/antlr3 (r246:354)
Diffstat (limited to 'src/parser')
32 files changed, 3418 insertions, 1951 deletions
diff --git a/src/parser/.gitignore b/src/parser/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/parser/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 5ac1c9e35..ee0a23c98 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -34,10 +34,17 @@ libcvc4parser_noinst_la_LIBADD = \ libcvc4parser_la_SOURCES = libcvc4parser_noinst_la_SOURCES = \ - parser.h \ - parser.cpp \ - parser_exception.h \ - symbol_table.h \ - antlr_parser.h \ - antlr_parser.cpp \ - memory_mapped_input_buffer.h + antlr_input.h \ + antlr_input.cpp \ + bounded_token_buffer.h \ + bounded_token_buffer.cpp \ + bounded_token_factory.h \ + bounded_token_factory.cpp \ + input.h \ + input.cpp \ + memory_mapped_input_buffer.h \ + memory_mapped_input_buffer.cpp \ + parser_options.h \ + parser_exception.h \ + symbol_table.h + diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp new file mode 100644 index 000000000..02e07bc8f --- /dev/null +++ b/src/parser/antlr_input.cpp @@ -0,0 +1,317 @@ +/********************* */ +/** antlr_parser.cpp + ** Original author: dejan + ** Major contributors: cconway + ** Minor contributors (to current version): mdeters + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** A super-class for ANTLR-generated input language parsers + **/ + +/* + * antlr_parser.cpp + * + * Created on: Nov 30, 2009 + * Author: dejan + */ + +#include <iostream> +#include <limits.h> +#include <antlr3.h> + +#include "util/output.h" +#include "util/Assert.h" +#include "expr/command.h" +#include "expr/type.h" +#include "parser/antlr_input.h" +#include "parser/bounded_token_buffer.h" +#include "parser/bounded_token_factory.h" +#include "parser/memory_mapped_input_buffer.h" +#include "parser/parser_exception.h" + +using namespace std; +using namespace CVC4; +using namespace CVC4::parser; +using namespace CVC4::kind; + +namespace CVC4 { +namespace parser { + +AntlrInput::AntlrInput(ExprManager* exprManager, const std::string& filename, unsigned int lookahead, bool useMmap) : + Input(exprManager, filename), + d_lookahead(lookahead), + d_lexer(NULL), + d_parser(NULL), + d_tokenStream(NULL) { + + if( useMmap ) { + d_input = MemoryMappedInputBufferNew(filename); + } else { + d_input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) filename.c_str()); + } + if( d_input == NULL ) { + throw ParserException("Couldn't open file: " + filename); + } +} + +/* +AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead) + Parser(exprManager,name), + d_lookahead(lookahead) { + +} +*/ + +AntlrInput::AntlrInput(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) : + Input(exprManager,name), + d_lookahead(lookahead), + d_lexer(NULL), + d_parser(NULL), + d_tokenStream(NULL) { + char* inputStr = strdup(input.c_str()); + char* nameStr = strdup(name.c_str()); + if( inputStr==NULL || nameStr==NULL ) { + throw ParserException("Couldn't initialize string input: '" + input + "'"); + } + d_input = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr); + if( d_input == NULL ) { + throw ParserException("Couldn't create input stream for string: '" + input + "'"); + } +} + +AntlrInput::~AntlrInput() { + d_tokenStream->free(d_tokenStream); + d_input->close(d_input); +} + +pANTLR3_INPUT_STREAM AntlrInput::getInputStream() { + return d_input; +} + +pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() { + return d_tokenStream; +} + +void AntlrInput::parseError(const std::string& message) + throw (ParserException) { + Debug("parser") << "Throwing exception: " << getFilename() << ":" + << d_lexer->getLine(d_lexer) << "." + << d_lexer->getCharPositionInLine(d_lexer) << ": " + << message << endl; + throw ParserException(message, getFilename(), d_lexer->getLine(d_lexer), + d_lexer->getCharPositionInLine(d_lexer)); +} + +void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) { + pANTLR3_EXCEPTION ex = recognizer->state->exception; + pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames; + stringstream ss; +// std::string msg; + + // Signal we are in error recovery now + recognizer->state->errorRecovery = ANTLR3_TRUE; + + // Indicate this recognizer had an error while processing. + recognizer->state->errorCount++; + + // Call the builtin error formatter + // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); + + /* This switch statement is adapted from antlr3baserecognizer.c:displayRecognitionError in libantlr3c. + * TODO: Make error messages more useful, maybe by including more expected tokens and information + * about the current token. */ + switch(ex->type) { + case ANTLR3_UNWANTED_TOKEN_EXCEPTION: + + // Indicates that the recognizer was fed a token which seems to be + // spurious input. We can detect this when the token that follows + // this unwanted token would normally be part of the syntactically + // correct stream. Then we can see that the token we are looking at + // is just something that should not be there and throw this exception. + // + if(tokenNames == NULL) { + ss << "Unexpected token." ; + } else { + if(ex->expecting == ANTLR3_TOKEN_EOF) { + ss << "Expected end of file."; + } else { + ss << "Expected " << tokenNames[ex->expecting] << "."; + } + } + break; + + case ANTLR3_MISSING_TOKEN_EXCEPTION: + + // Indicates that the recognizer detected that the token we just + // hit would be valid syntactically if preceded by a particular + // token. Perhaps a missing ';' at line end or a missing ',' in an + // expression list, and such like. + // + if(tokenNames == NULL) { + ss << "Missing token (" << ex->expecting << ")."; + } else { + if(ex->expecting == ANTLR3_TOKEN_EOF) { + ss << "Missing end of file marker."; + } else { + ss << "Missing " << tokenNames[ex->expecting] << "."; + } + } + break; + + case ANTLR3_RECOGNITION_EXCEPTION: + + // Indicates that the recognizer received a token + // in the input that was not predicted. This is the basic exception type + // from which all others are derived. So we assume it was a syntax error. + // You may get this if there are not more tokens and more are needed + // to complete a parse for instance. + // + ss <<"Syntax error."; + break; + + case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: + + // We were expecting to see one thing and got another. This is the + // most common error if we could not detect a missing or unwanted token. + // Here you can spend your efforts to + // derive more useful error messages based on the expected + // token set and the last token and so on. The error following + // bitmaps do a good job of reducing the set that we were looking + // for down to something small. Knowing what you are parsing may be + // able to allow you to be even more specific about an error. + // + if(tokenNames == NULL) { + ss << "Syntax error."; + } else { + if(ex->expecting == ANTLR3_TOKEN_EOF) { + ss << "Expected end of file."; + } else { + ss << "Expected " << tokenNames[ex->expecting] << "."; + } + } + break; + + case ANTLR3_NO_VIABLE_ALT_EXCEPTION: + + // We could not pick any alt decision from the input given + // so god knows what happened - however when you examine your grammar, + // you should. It means that at the point where the current token occurred + // that the DFA indicates nowhere to go from here. + // + ss << "Cannot match to any predicted input."; + + break; + + case ANTLR3_MISMATCHED_SET_EXCEPTION: + + { + ANTLR3_UINT32 count; + ANTLR3_UINT32 bit; + ANTLR3_UINT32 size; + ANTLR3_UINT32 numbits; + pANTLR3_BITSET errBits; + + // This means we were able to deal with one of a set of + // possible tokens at this point, but we did not see any + // member of that set. + // + ss << "Unexpected input. Expected one of : "; + + // What tokens could we have accepted at this point in the + // parse? + // + count = 0; + errBits = antlr3BitsetLoad(ex->expectingSet); + numbits = errBits->numBits(errBits); + size = errBits->size(errBits); + + if(size > 0) { + // However many tokens we could have dealt with here, it is usually + // not useful to print ALL of the set here. I arbitrarily chose 8 + // here, but you should do whatever makes sense for you of course. + // No token number 0, so look for bit 1 and on. + // + for(bit = 1; bit < numbits && count < 8 && count < size; bit++) { + // TODO: This doesn;t look right - should be asking if the bit is set!! + // + if(tokenNames[bit]) { + if( count++ > 0 ) { + ss << ", "; + } + ss << tokenNames[bit]; + } + } + } else { + Unreachable("Parse error with empty set of expected tokens."); + } + } + break; + + case ANTLR3_EARLY_EXIT_EXCEPTION: + + // We entered a loop requiring a number of token sequences + // but found a token that ended that sequence earlier than + // we should have done. + // + ss << "Missing elements."; + break; + + default: + + // We don't handle any other exceptions here, but you can + // if you wish. If we get an exception that hits this point + // then we are just going to report what we know about the + // token. + // + Unhandled("Unexpected exception in parser."); + break; + } + + // Now get ready to throw an exception + pANTLR3_PARSER parser = (pANTLR3_PARSER)(recognizer->super); + AlwaysAssert(parser!=NULL); + AntlrInput *input = (AntlrInput*)(parser->super); + AlwaysAssert(input!=NULL); + + // Call the error display routine + input->parseError(ss.str()); +} + +void AntlrInput::setLexer(pANTLR3_LEXER pLexer) { + d_lexer = pLexer; + + pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory; + if( pTokenFactory != NULL ) { + pTokenFactory->close(pTokenFactory); + } + + /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */ + pTokenFactory = BoundedTokenFactoryNew(d_input, 2*d_lookahead); + if( pTokenFactory == NULL ) { + throw ParserException("Couldn't create token factory."); + } + d_lexer->rec->state->tokFactory = pTokenFactory; + + pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource); + if( buffer == NULL ) { + throw ParserException("Couldn't create token buffer."); + } + + d_tokenStream = buffer->commonTstream; +} + +void AntlrInput::setParser(pANTLR3_PARSER pParser) { + d_parser = pParser; + // ANTLR isn't using super, AFAICT. + d_parser->super = this; + d_parser->rec->reportError = &reportError; +} + + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h new file mode 100644 index 000000000..01ebe8339 --- /dev/null +++ b/src/parser/antlr_input.h @@ -0,0 +1,93 @@ +/********************* */ +/** antlr_parser.h + ** Original author: dejan + ** Major contributors: cconway + ** Minor contributors (to current version): mdeters + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Base for ANTLR parser classes. + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__ANTLR_PARSER_H +#define __CVC4__PARSER__ANTLR_PARSER_H + +#include <vector> +#include <string> +#include <iostream> +#include <antlr3.h> + +#include "expr/expr.h" +#include "expr/expr_manager.h" +#include "parser/input.h" +#include "parser/symbol_table.h" +#include "util/Assert.h" + +namespace CVC4 { + +class Command; +class Type; +class FunctionType; + +namespace parser { + +/** + * Wrapper for an ANTLR parser that includes convenience methods to set up input and token streams. + */ +class AntlrInput : public Input { + + unsigned int d_lookahead; + pANTLR3_LEXER d_lexer; + pANTLR3_PARSER d_parser; + pANTLR3_COMMON_TOKEN_STREAM d_tokenStream; + pANTLR3_INPUT_STREAM d_input; + + static void reportError(pANTLR3_BASE_RECOGNIZER recognizer); + +public: + AntlrInput(ExprManager* exprManager, const std::string& filename, unsigned int lookahead, bool useMmap); + // AntlrParser(ExprManager* em, std::istream& input, const std::string& name, unsigned int lookahead); + AntlrInput(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead); + ~AntlrInput(); + + inline static std::string tokenText(pANTLR3_COMMON_TOKEN token); + +protected: + + /** + * Throws a semantic exception with the given message. + */ + void parseError(const std::string& msg) throw (ParserException); + + /** Get the lexer */ + pANTLR3_LEXER getLexer(); + /** Retrieve the input stream for this parser. */ + pANTLR3_INPUT_STREAM getInputStream(); + /** Retrieve the token stream for this parser. Must not be called before <code>setLexer()</code>. */ + pANTLR3_COMMON_TOKEN_STREAM getTokenStream(); + /** Set the ANTLR lexer for this parser. */ + void setLexer(pANTLR3_LEXER pLexer); + /** Set the ANTLR parser implementation for this parser. */ + void setParser(pANTLR3_PARSER pParser); +}; + +std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) { + ANTLR3_MARKER start = token->getStartIndex(token); + ANTLR3_MARKER end = token->getStopIndex(token); + std::string txt( (const char *)start, end-start+1 ); + Debug("parser-extra") << "tokenText: start=" << start << std::endl + << "end=" << end << std::endl + << "txt='" << txt << "'" << std::endl; + return txt; +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__ANTLR_PARSER_H */ diff --git a/src/parser/antlr_parser.h b/src/parser/antlr_parser.h deleted file mode 100644 index e970ebd52..000000000 --- a/src/parser/antlr_parser.h +++ /dev/null @@ -1,361 +0,0 @@ -/********************* */ -/** antlr_parser.h - ** Original author: dejan - ** Major contributors: cconway - ** Minor contributors (to current version): mdeters - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Base for ANTLR parser classes. - **/ - -#include "cvc4parser_private.h" - -#ifndef __CVC4__PARSER__ANTLR_PARSER_H -#define __CVC4__PARSER__ANTLR_PARSER_H - -#include <vector> -#include <string> -#include <iostream> - -#include <antlr/LLkParser.hpp> -#include <antlr/SemanticException.hpp> - -#include "expr/expr.h" -#include "expr/expr_manager.h" -#include "util/Assert.h" -#include "parser/symbol_table.h" - -namespace CVC4 { - -class Command; -class Type; -class FunctionType; - -namespace parser { - -/** - * Wrapper of the ANTLR parser that includes convenience methods that interacts - * with the expression manager. The grammars should have as little C++ code as - * possible and all the state and actual functionality (besides parsing) should - * go into this class. - */ -class AntlrParser : public antlr::LLkParser { - -public: - - /** Types of check for the symols */ - enum DeclarationCheck { - /** Enforce that the symbol has been declared */ - CHECK_DECLARED, - /** Enforce that the symbol has not been declared */ - CHECK_UNDECLARED, - /** Don't check anything */ - CHECK_NONE - }; - - /** - * Sets the expression manager to use when creating/managing expression. - * @param expr_manager the expression manager - */ - void setExpressionManager(ExprManager* expr_manager); - - /** - * Sets the logic for the current benchmark. Declares any logic symbols. - * - * @param name the name of the logic (e.g., QF_UF, AUFLIA) - */ - void setLogic(const std::string& name); - - /** - * Parse a command. - * @return a command - */ - virtual Command* parseCommand() = 0; - - /** - * Parse an expression. - * @return the expression - */ - virtual Expr parseExpr() = 0; - - /** Enable semantic checks during parsing. */ - void enableChecks(); - - /** Disable semantic checks during parsing. Disabling checks may lead to crashes on bad inputs. */ - void disableChecks(); - -protected: - - /** - * Create a parser with the given input state and token lookahead. - * - * @param state the shared input state - * @param k lookahead - */ - AntlrParser(const antlr::ParserSharedInputState& state, int k); - - /** - * Create a parser with the given token buffer and lookahead. - * - * @param tokenBuf the token buffer to use in parsing - * @param k lookahead - */ - AntlrParser(antlr::TokenBuffer& tokenBuf, int k); - - /** - * Create a parser with the given token stream and lookahead. - * - * @param lexer the lexer to use in parsing - * @param k lookahead - */ - AntlrParser(antlr::TokenStream& lexer, int k); - - /** - * Throws an ANTLR semantic exception with the given message. - */ - void parseError(const std::string& msg) throw (antlr::SemanticException); - - /** - * Returns a variable, given a name and a type. - * @param var_name the name of the variable - * @return the variable expression - */ - Expr getVariable(const std::string& var_name); - - /** - * Returns a function, given a name and a type. - * @param name the name of the function - * @return the function expression - */ - Expr getFunction(const std::string& name); - - /** - * Returns a sort, given a name - */ - Type* getSort(const std::string& sort_name); - - /** - * Types of symbols. Used to define namespaces. - */ - enum SymbolType { - /** Variables */ - SYM_VARIABLE, - /** Functions */ - SYM_FUNCTION, - /** Sorts */ - SYM_SORT - }; - - /** - * Checks if a symbol has been declared. - * @param name the symbol name - * @param type the symbol type - * @return true iff the symbol has been declared with the given type - */ - bool isDeclared(const std::string& name, SymbolType type = SYM_VARIABLE); - - /** - * Checks if the declaration policy we want to enforce holds - * for the given symbol. - * @param name the symbol to check - * @param check the kind of check to perform - * @param type the type of the symbol - * @throws SemanticException if checks are enabled and the check fails - */ - void checkDeclaration(const std::string& name, - DeclarationCheck check, - SymbolType type = SYM_VARIABLE) - throw (antlr::SemanticException); - - /** - * Checks whether the given name is bound to a function. - * @param name the name to check - * @throws SemanticException if checks are enabled and name is not bound to a function - */ - void checkFunction(const std::string& name) throw (antlr::SemanticException); - - /** - * Check that <code>kind</code> can accept <code>numArgs</codes> arguments. - * @param kind the built-in operator to check - * @param numArgs the number of actual arguments - * @throws SemanticException if checks are enabled and the operator <code>kind</code> cannot be - * applied to <code>numArgs</code> arguments. - */ - void checkArity(Kind kind, unsigned int numArgs) throw (antlr::SemanticException); - - /** - * Returns the type for the variable with the given name. - * @param name the symbol to lookup - * @param type the (namespace) type of the symbol - */ - Type* getType(const std::string& var_name, - SymbolType type = SYM_VARIABLE); - - /** - * Returns the true expression. - * @return the true expression - */ - Expr getTrueExpr() const; - - /** - * Returns the false expression. - * @return the false expression - */ - Expr getFalseExpr() const; - - /** - * Creates a new unary CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param child the child - * @return the new unary expression - */ - Expr mkExpr(Kind kind, const Expr& child); - - /** - * Creates a new binary CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param child1 the first child - * @param child2 the second child - * @return the new binary expression - */ - Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2); - - /** - * Creates a new ternary CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param child_1 the first child - * @param child_2 the second child - * @param child_3 - * @return the new ternary expression - */ - Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, - const Expr& child_3); - - /** - * Creates a new CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param children the children of the expression - */ - Expr mkExpr(Kind kind, const std::vector<Expr>& children); - - /** Create a new CVC4 variable expression of the given type. */ - Expr mkVar(const std::string& name, Type* type); - - /** Create a set of new CVC4 variable expressions of the given - type. */ - const std::vector<Expr> - mkVars(const std::vector<std::string> names, - Type* type); - - /** Create a new variable definition (e.g., from a let binding). */ - void defineVar(const std::string& name, const Expr& val); - /** Remove a variable definition (e.g., from a let binding). */ - void undefineVar(const std::string& name); - - /** Returns a function type over the given domain and range types. */ - Type* functionType(Type* domain, Type* range); - - /** Returns a function type over the given types. argTypes must be - non-empty. */ - Type* functionType(const std::vector<Type*>& argTypes, - Type* rangeType); - - /** - * Returns a function type over the given types. If types has only - * one element, then the type is just types[0]. - * - * @param types a non-empty list of input and output types. - */ - Type* functionType(const std::vector<Type*>& types); - - /** - * Returns a predicate type over the given sorts. If sorts is empty, - * then the type is just BOOLEAN. - - * @param sorts a list of input types - */ - Type* predicateType(const std::vector<Type*>& sorts); - - /** - * Creates a new sort with the given name. - */ - Type* newSort(const std::string& name); - - /** - * Creates a new sorts with the given names. - */ - const std::vector<Type*> - newSorts(const std::vector<std::string>& names); - - /** Is the symbol bound to a boolean variable? */ - bool isBoolean(const std::string& name); - - /** Is the symbol bound to a function? */ - bool isFunction(const std::string& name); - - /** Is the symbol bound to a predicate? */ - bool isPredicate(const std::string& name); - - /** Returns the boolean type. */ - BooleanType* booleanType(); - - /** Returns the kind type (i.e., the type of types). */ - KindType* kindType(); - - /** Returns the minimum arity of the given kind. */ - static unsigned int minArity(Kind kind); - - /** Returns the maximum arity of the given kind. */ - static unsigned int maxArity(Kind kind); - - /** Returns a string representation of the given object (for - debugging). */ - inline std::string toString(DeclarationCheck check) { - switch(check) { - case CHECK_NONE: return "CHECK_NONE"; - case CHECK_DECLARED: return "CHECK_DECLARED"; - case CHECK_UNDECLARED: return "CHECK_UNDECLARED"; - default: Unreachable(); - } - } - - /** Returns a string representation of the given object (for - debugging). */ - inline std::string toString(SymbolType type) { - switch(type) { - case SYM_VARIABLE: return "SYM_VARIABLE"; - case SYM_FUNCTION: return "SYM_FUNCTION"; - case SYM_SORT: return "SYM_SORT"; - default: Unreachable(); - } - } - -// bool checksEnabled(); - -private: - - /** The expression manager */ - ExprManager* d_exprManager; - - /** The symbol table lookup */ - SymbolTable<Expr> d_varSymbolTable; - - /** The sort table */ - SymbolTable<Type*> d_sortTable; - - /** Are semantic checks enabled during parsing? */ - bool d_checksEnabled; - - /** Lookup a symbol in the given namespace. */ - Expr getSymbol(const std::string& var_name, SymbolType type); -}; - -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ - -#endif /* __CVC4__PARSER__ANTLR_PARSER_H */ diff --git a/src/parser/bounded_token_buffer.cpp b/src/parser/bounded_token_buffer.cpp new file mode 100644 index 000000000..f53b6d548 --- /dev/null +++ b/src/parser/bounded_token_buffer.cpp @@ -0,0 +1,640 @@ +/// \file +/// Default implementation of CommonTokenStream +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3commontoken.h> +#include <antlr3lexer.h> +#include <antlr3tokenstream.h> + +#include "bounded_token_buffer.h" +#include "cvc4_config.h" +#include "util/Assert.h" + +namespace CVC4 { +namespace parser { + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +// COMMON_TOKEN_STREAM API +// +static void setTokenTypeChannel (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel); +static void discardTokenType (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_INT32 ttype); +static void discardOffChannel (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_BOOLEAN discard); +static pANTLR3_VECTOR getTokens (pANTLR3_COMMON_TOKEN_STREAM cts); +static pANTLR3_LIST getTokenRange (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); +static pANTLR3_LIST getTokensSet (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types); +static pANTLR3_LIST getTokensList (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list); +static pANTLR3_LIST getTokensType (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type); + +// TOKEN_STREAM API +// +static pANTLR3_COMMON_TOKEN tokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k); +static pANTLR3_COMMON_TOKEN dbgTokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k); +static pANTLR3_COMMON_TOKEN get (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 i); +static pANTLR3_TOKEN_SOURCE getTokenSource (pANTLR3_TOKEN_STREAM ts); +static void setTokenSource (pANTLR3_TOKEN_STREAM ts, pANTLR3_TOKEN_SOURCE tokenSource); +static pANTLR3_STRING toString (pANTLR3_TOKEN_STREAM ts); +static pANTLR3_STRING toStringSS (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); +static pANTLR3_STRING toStringTT (pANTLR3_TOKEN_STREAM ts, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop); +static void setDebugListener (pANTLR3_TOKEN_STREAM ts, pANTLR3_DEBUG_EVENT_LISTENER debugger); + +// INT STREAM API +// +static void consume (pANTLR3_INT_STREAM is); +static void dbgConsume (pANTLR3_INT_STREAM is); +static ANTLR3_UINT32 _LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_UINT32 dbgLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_MARKER mark (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER dbgMark (pANTLR3_INT_STREAM is); +static void release (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); +static ANTLR3_UINT32 size (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER tindex (pANTLR3_INT_STREAM is); +static void rewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void dbgRewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void rewindLast (pANTLR3_INT_STREAM is); +static void dbgRewindLast (pANTLR3_INT_STREAM is); +static void seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static void dbgSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static pANTLR3_STRING getSourceName (pANTLR3_INT_STREAM is); +static pANTLR3_COMMON_TOKEN LB (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 i); + +static pANTLR3_COMMON_TOKEN nextToken(pBOUNDED_TOKEN_BUFFER buffer); +static pANTLR3_COMMON_TOKEN simpleEmit (pANTLR3_LEXER lexer); + +void +BoundedTokenBufferFree(pBOUNDED_TOKEN_BUFFER buffer) { + buffer->commonTstream->free(buffer->commonTstream); + ANTLR3_FREE(buffer->tokenBuffer); + ANTLR3_FREE(buffer); +} + +/*ANTLR3_API pANTLR3_COMMON_TOKEN_STREAM +antlr3CommonTokenDebugStreamSourceNew(ANTLR3_UINT32 hint, pANTLR3_TOKEN_SOURCE source, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + pANTLR3_COMMON_TOKEN_STREAM stream; + + // Create a standard token stream + // + stream = antlr3CommonTokenStreamSourceNew(hint, source); + + // Install the debugger object + // + stream->tstream->debugger = debugger; + + // Override standard token stream methods with debugging versions + // + stream->tstream->initialStreamState = ANTLR3_FALSE; + + stream->tstream->_LT = dbgTokLT; + + stream->tstream->istream->consume = dbgConsume; + stream->tstream->istream->_LA = dbgLA; + stream->tstream->istream->mark = dbgMark; + stream->tstream->istream->rewind = dbgRewindStream; + stream->tstream->istream->rewindLast = dbgRewindLast; + stream->tstream->istream->seek = dbgSeek; + + return stream; +}*/ + +pBOUNDED_TOKEN_BUFFER +BoundedTokenBufferSourceNew(ANTLR3_UINT32 k, pANTLR3_TOKEN_SOURCE source) +{ + pBOUNDED_TOKEN_BUFFER buffer; + pANTLR3_COMMON_TOKEN_STREAM stream; + + + AlwaysAssert( k > 0 ); + + /* Memory for the interface structure + */ + buffer = (pBOUNDED_TOKEN_BUFFER) ANTLR3_MALLOC(sizeof(BOUNDED_TOKEN_BUFFER_struct)); + + if (buffer == NULL) + { + return NULL; + } + + buffer->tokenBuffer = (pANTLR3_COMMON_TOKEN*) ANTLR3_MALLOC(2*k*sizeof(pANTLR3_COMMON_TOKEN)); + buffer->currentIndex = 0; + buffer->maxIndex = 0; + buffer->k = k; + buffer->bufferSize = 2*k; + buffer->empty = ANTLR3_TRUE; + buffer->done = ANTLR3_FALSE; + + stream = antlr3CommonTokenStreamSourceNew(k,source); + if (stream == NULL) + { + return NULL; + } + + stream->super = buffer; + buffer->commonTstream = stream; + + /* Defaults + */ + stream->p = -1; + + /* Install the token stream API + */ + stream->tstream->_LT = tokLT; + stream->tstream->get = get; + stream->tstream->getTokenSource = getTokenSource; + stream->tstream->setTokenSource = setTokenSource; + stream->tstream->toString = toString; + stream->tstream->toStringSS = toStringSS; + stream->tstream->toStringTT = toStringTT; + stream->tstream->setDebugListener = setDebugListener; + + /* Install INT_STREAM interface + */ + stream->tstream->istream->_LA = _LA; + stream->tstream->istream->mark = mark; + stream->tstream->istream->release = release; + stream->tstream->istream->size = size; + stream->tstream->istream->index = tindex; + stream->tstream->istream->rewind = rewindStream; + stream->tstream->istream->rewindLast= rewindLast; + stream->tstream->istream->seek = seek; + stream->tstream->istream->consume = consume; + stream->tstream->istream->getSourceName = getSourceName; + + return buffer; +} + +// Install a debug listener adn switch to debug mode methods +// +static void +setDebugListener (pANTLR3_TOKEN_STREAM ts, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + // Install the debugger object + // + ts->debugger = debugger; + + // Override standard token stream methods with debugging versions + // + ts->initialStreamState = ANTLR3_FALSE; + + ts->_LT = dbgTokLT; + + ts->istream->consume = dbgConsume; + ts->istream->_LA = dbgLA; + ts->istream->mark = dbgMark; + ts->istream->rewind = dbgRewindStream; + ts->istream->rewindLast = dbgRewindLast; + ts->istream->seek = dbgSeek; +} + +/** Get the ith token from the current position 1..n where k=1 is the +* first symbol of lookahead. +*/ +static pANTLR3_COMMON_TOKEN tokLT(pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k) { + pANTLR3_COMMON_TOKEN_STREAM cts; + pBOUNDED_TOKEN_BUFFER buffer; + + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + buffer = (pBOUNDED_TOKEN_BUFFER) cts->super; + + /* k must be in the range [-buffer->k..buffer->k] */ + AlwaysAssert( k <= (ANTLR3_INT32)buffer->k + && -k <= (ANTLR3_INT32)buffer->k ); + + if(k == 0) { + return NULL; + } + + /* Initialize the buffer on our first call. */ + if( EXPECT_FALSE(buffer->empty == ANTLR3_TRUE) ) { + AlwaysAssert( buffer->tokenBuffer != NULL ); + buffer->tokenBuffer[ 0 ] = nextToken(buffer); + buffer->maxIndex = 0; + buffer->currentIndex = 0; + buffer->empty = ANTLR3_FALSE; + } + + ANTLR3_UINT32 kIndex; + if( k > 0 ) { + /* look-ahead token k is at offset k-1 */ + kIndex = buffer->currentIndex + k - 1; + } else { + /* Can't look behind more tokens than we've consumed. */ + AlwaysAssert( -k <= (ANTLR3_INT32)buffer->currentIndex ); + /* look-behind token k is at offset -k */ + kIndex = buffer->currentIndex + k; + } + + while( kIndex > buffer->maxIndex ) { + buffer->maxIndex++; + buffer->tokenBuffer[ buffer->maxIndex % buffer->bufferSize ] = nextToken(buffer); + } + + return buffer->tokenBuffer[ kIndex % buffer->bufferSize ]; +} + + +/// As per the normal tokLT but sends information to the debugger +/// +static pANTLR3_COMMON_TOKEN +dbgTokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k) +{ + return tokLT(ts, k); +} + +#ifdef ANTLR3_WINDOWS + /* When fully optimized VC7 complains about non reachable code. + * Not yet sure if this is an optimizer bug, or a bug in the flow analysis + */ +#pragma warning( disable : 4702 ) +#endif + +static pANTLR3_COMMON_TOKEN +LB(pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_INT32 k) +{ + AlwaysAssert(false); +} + +static pANTLR3_COMMON_TOKEN +get (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 i) +{ + AlwaysAssert(false); +} + +static pANTLR3_TOKEN_SOURCE +getTokenSource (pANTLR3_TOKEN_STREAM ts) +{ + return ts->tokenSource; +} + +static void +setTokenSource ( pANTLR3_TOKEN_STREAM ts, + pANTLR3_TOKEN_SOURCE tokenSource) +{ + ts->tokenSource = tokenSource; +} + +static pANTLR3_STRING +toString (pANTLR3_TOKEN_STREAM ts) +{ + AlwaysAssert(false); +} + +static pANTLR3_STRING +toStringSS(pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop) +{ + AlwaysAssert(false); +} + +static pANTLR3_STRING +toStringTT (pANTLR3_TOKEN_STREAM ts, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop) +{ + AlwaysAssert(false); +} + +/** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ +static void +consume (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + pBOUNDED_TOKEN_BUFFER buffer; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + buffer = (pBOUNDED_TOKEN_BUFFER) cts->super; + + buffer->currentIndex++; +} + + +/// As per ordinary consume but notifies the debugger about hidden +/// tokens and so on. +/// +static void +dbgConsume (pANTLR3_INT_STREAM is) +{ + consume(is); +} + +/** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot execute actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored, + * channel. + */ +static void +setTokenTypeChannel (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel) +{ + if (tokenStream->channelOverrides == NULL) + { + tokenStream->channelOverrides = antlr3ListNew(10); + } + + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. + */ + tokenStream->channelOverrides->put(tokenStream->channelOverrides, ttype, ANTLR3_FUNC_PTR((ANTLR3_UINT32)channel + 1), NULL); +} + +static void +discardTokenType (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 ttype) +{ + if (tokenStream->discardSet == NULL) + { + tokenStream->discardSet = antlr3ListNew(31); + } + + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. We could use bitsets for this I suppose too. + */ + tokenStream->discardSet->put(tokenStream->discardSet, ttype, ANTLR3_FUNC_PTR((ANTLR3_UINT32)ttype + 1), NULL); +} + +static void +discardOffChannel (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_BOOLEAN discard) +{ + tokenStream->discardOffChannel = discard; +} + +static pANTLR3_VECTOR +getTokens (pANTLR3_COMMON_TOKEN_STREAM tokenStream) +{ + AlwaysAssert(false); +} + +static pANTLR3_LIST +getTokenRange (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop) +{ + AlwaysAssert(false); +} +/** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ +static pANTLR3_LIST +getTokensSet (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types) +{ + AlwaysAssert(false); +} + +static pANTLR3_LIST +getTokensList (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list) +{ + AlwaysAssert(false); +} + +static pANTLR3_LIST +getTokensType (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type) +{ + AlwaysAssert(false); +} + +static ANTLR3_UINT32 +_LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + pANTLR3_TOKEN_STREAM ts; + pANTLR3_COMMON_TOKEN tok; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + tok = ts->_LT(ts, i); + + if (tok != NULL) + { + return tok->getType(tok); + } + else + { + return ANTLR3_TOKEN_INVALID; + } +} + +/// As per _LA() but for debug mode. +/// +static ANTLR3_UINT32 +dbgLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + AlwaysAssert(false); +} + +static ANTLR3_MARKER +mark (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} + +/// As per mark() but with a call to tell the debugger we are doing this +/// +static ANTLR3_MARKER +dbgMark (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} + +static void +release (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) +{ + return; +} + +static ANTLR3_UINT32 +size (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} + +static ANTLR3_MARKER +tindex (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + pBOUNDED_TOKEN_BUFFER buffer; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + buffer = (pBOUNDED_TOKEN_BUFFER) cts->super; + + return buffer->currentIndex; +} + +static void +dbgRewindLast (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} +static void +rewindLast (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} +static void +rewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + AlwaysAssert(false); +} +static void +dbgRewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + AlwaysAssert(false); +} + +static void +seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + AlwaysAssert(false); +} +static void +dbgSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + AlwaysAssert(false); +} + +static pANTLR3_COMMON_TOKEN nextToken(pBOUNDED_TOKEN_BUFFER buffer) { + pANTLR3_COMMON_TOKEN_STREAM tokenStream; + pANTLR3_COMMON_TOKEN tok; + ANTLR3_BOOLEAN discard; + void * channelI; + + tokenStream = buffer->commonTstream; + + if( buffer->done == ANTLR3_TRUE ) { + return &(tokenStream->tstream->tokenSource->eofToken); + } + + /* Pick out the next token from the token source + * Remember we just get a pointer (reference if you like) here + * and so if we store it anywhere, we don't set any pointers to auto free it. + */ + do { + tok + = tokenStream->tstream->tokenSource->nextToken( + tokenStream->tstream->tokenSource); + if(tok == NULL || tok->type == ANTLR3_TOKEN_EOF) { + buffer->done = ANTLR3_TRUE; + break; + } + + discard = ANTLR3_FALSE; /* Assume we are not discarding */ + + /* I employ a bit of a trick, or perhaps hack here. Rather than + * store a pointer to a structure in the override map and discard set + * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 + * we can distinguish "not being there" from "being channel or type 0" + */ + + if(tokenStream->discardSet != NULL + && tokenStream->discardSet->get(tokenStream->discardSet, + tok->getType(tok)) != NULL) { + discard = ANTLR3_TRUE; + } else if(tok->getChannel(tok) != tokenStream->channel) { + discard = ANTLR3_TRUE; + } + + } while(discard == ANTLR3_TRUE); + + return tok; +} + + +/// Return a string that represents the name assoicated with the input source +/// +/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. +/// +/// /returns +/// /implements ANTLR3_INT_STREAM_struct::getSourceName() +/// +static pANTLR3_STRING +getSourceName (pANTLR3_INT_STREAM is) +{ + // Slightly convoluted as we must trace back to the lexer's input source + // via the token source. The streamName that is here is not initialized + // because this is a token stream, not a file or string stream, which are the + // only things that have a context for a source name. + // + return ((pANTLR3_TOKEN_STREAM)(is->super))->tokenSource->fileName; +} + +static pANTLR3_COMMON_TOKEN +simpleEmit (pANTLR3_LEXER lexer) +{ + pANTLR3_COMMON_TOKEN token; + + /* We could check pointers to token factories and so on, but + * we are in code that we want to run as fast as possible + * so we are not checking any errors. So make sure you have installed an input stream before + * trying to emit a new token. + */ + token = antlr3CommonTokenNew( lexer->rec->state->type ); + // lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory); + + /* Install the supplied information, and some other bits we already know + * get added automatically, such as the input stream it is associated with + * (though it can all be overridden of course) + */ + // token->type = lexer->rec->state->type; + token->channel = lexer->rec->state->channel; + token->start = lexer->rec->state->tokenStartCharIndex; + token->stop = lexer->getCharIndex(lexer) - 1; + token->line = lexer->rec->state->tokenStartLine; + token->charPosition = lexer->rec->state->tokenStartCharPositionInLine; + + if (lexer->rec->state->text != NULL) + { + token->textState = ANTLR3_TEXT_STRING; + token->tokText.text = lexer->rec->state->text; + } + else + { + token->textState = ANTLR3_TEXT_NONE; + } + token->lineStart = lexer->input->currentLine; + token->user1 = lexer->rec->state->user1; + token->user2 = lexer->rec->state->user2; + token->user3 = lexer->rec->state->user3; + token->custom = lexer->rec->state->custom; + + lexer->rec->state->token = token; + + return token; +} + + +} +} diff --git a/src/parser/bounded_token_buffer.h b/src/parser/bounded_token_buffer.h new file mode 100644 index 000000000..9c18ec3de --- /dev/null +++ b/src/parser/bounded_token_buffer.h @@ -0,0 +1,73 @@ +/** \file + * Defines the interface for an ANTLR3 common token stream. Custom token streams should create + * one of these and then override any functions by installing their own pointers + * to implement the various functions. + */ +#ifndef __CVC4__PARSER__BOUNDED_TOKEN_BUFFER_H +#define __CVC4__PARSER__BOUNDED_TOKEN_BUFFER_H + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3defs.h> + +namespace CVC4 { +namespace parser { + +#ifdef __cplusplus +extern "C" { +#endif + + +/** A "super" structure for COMMON_TOKEN_STREAM. */ +typedef struct BOUNDED_TOKEN_BUFFER_struct +{ + pANTLR3_COMMON_TOKEN_STREAM commonTstream; + pANTLR3_COMMON_TOKEN* tokenBuffer; + // tokenNeg1, token1, token2; + ANTLR3_UINT32 currentIndex, maxIndex, k, bufferSize; + ANTLR3_BOOLEAN empty, done; +} BOUNDED_TOKEN_BUFFER, *pBOUNDED_TOKEN_BUFFER; + +pBOUNDED_TOKEN_BUFFER +BoundedTokenBufferSourceNew(ANTLR3_UINT32 k, pANTLR3_TOKEN_SOURCE source); + +void +BoundedTokenBufferFree(pBOUNDED_TOKEN_BUFFER buffer); + +#ifdef __cplusplus +} +#endif + +} +} + + +#endif /* __CVC4__PARSER__BOUNDED_TOKEN_BUFFER_H */ + diff --git a/src/parser/bounded_token_factory.cpp b/src/parser/bounded_token_factory.cpp new file mode 100644 index 000000000..dae2f46e5 --- /dev/null +++ b/src/parser/bounded_token_factory.cpp @@ -0,0 +1,134 @@ +/* + * bounded_token_factory.cpp + * + * Created on: Mar 2, 2010 + * Author: dejan + */ + +#include <antlr3input.h> +#include <antlr3commontoken.h> +#include <antlr3interfaces.h> +#include "bounded_token_factory.h" + +namespace CVC4 { +namespace parser { + +static pANTLR3_COMMON_TOKEN +newPoolToken(pANTLR3_TOKEN_FACTORY factory); + +static void +setInputStream (pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input); + +static void +factoryClose (pANTLR3_TOKEN_FACTORY factory); + +pANTLR3_TOKEN_FACTORY +BoundedTokenFactoryNew(pANTLR3_INPUT_STREAM input,ANTLR3_UINT32 size) +{ + pANTLR3_TOKEN_FACTORY factory; + pANTLR3_COMMON_TOKEN tok; + int i; + + /* allocate memory + */ + factory = (pANTLR3_TOKEN_FACTORY) ANTLR3_MALLOC(sizeof(ANTLR3_TOKEN_FACTORY)); + + if (factory == NULL) + { + return NULL; + } + + /* Install factory API + */ + factory->newToken = newPoolToken; + factory->close = factoryClose; + factory->setInputStream = setInputStream; + + /* Allocate the initial pool + */ + factory->thisPool = size; + factory->nextToken = 0; + factory->pools = (pANTLR3_COMMON_TOKEN*) ANTLR3_MALLOC(sizeof(pANTLR3_COMMON_TOKEN)); + factory->pools[0] = + (pANTLR3_COMMON_TOKEN) + ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_COMMON_TOKEN) * size)); + + /* Set up the tokens once and for all */ + for( i=0; i < size; i++ ) { + tok = factory->pools[0] + i; + antlr3SetTokenAPI(tok); + + /* It is factory made, and we need to copy the string factory pointer + */ + tok->factoryMade = ANTLR3_TRUE; + tok->strFactory = input == NULL ? NULL : input->strFactory; + tok->input = input; + } + + /* Factory space is good, we now want to initialize our cheating token + * which one it is initialized is the model for all tokens we manufacture + */ + antlr3SetTokenAPI(&factory->unTruc); + + /* Set some initial variables for future copying + */ + factory->unTruc.factoryMade = ANTLR3_TRUE; + + // Input stream + // + setInputStream(factory, input); + + return factory; + +} + +static pANTLR3_COMMON_TOKEN +newPoolToken(pANTLR3_TOKEN_FACTORY factory) +{ + ANTLR3_UINT32 size = factory->thisPool; + pANTLR3_COMMON_TOKEN tok = factory->pools[0] + (factory->nextToken % size); + if(factory->nextToken >= size && tok->custom != NULL && tok->freeCustom != NULL) { + tok->freeCustom(tok->custom); + tok->custom = NULL; + } + factory->nextToken++; + + return tok; +} + +static void +factoryClose (pANTLR3_TOKEN_FACTORY factory) +{ + ANTLR3_UINT32 i; + ANTLR3_UINT32 size = factory->thisPool; + pANTLR3_COMMON_TOKEN tok; + + for(i = 0; i < size && i < factory->nextToken; i++) { + tok = factory->pools[0] + i; + if(tok->custom != NULL && tok->freeCustom != NULL) { + tok->freeCustom(tok->custom); + tok->custom = NULL; + } + } + ANTLR3_FREE(factory->pools[0]); + ANTLR3_FREE(factory->pools); + ANTLR3_FREE(factory); + +} + +static void +setInputStream (pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input) +{ + factory->input = input; + factory->unTruc.input = input; + if (input != NULL) + { + factory->unTruc.strFactory = input->strFactory; + } + else + { + factory->unTruc.strFactory = NULL; + } +} +} +} diff --git a/src/parser/bounded_token_factory.h b/src/parser/bounded_token_factory.h new file mode 100644 index 000000000..6f8729c19 --- /dev/null +++ b/src/parser/bounded_token_factory.h @@ -0,0 +1,38 @@ +/* + * bounded_token_factory.h + * + * Created on: Mar 2, 2010 + * Author: dejan + */ + +#ifndef BOUNDED_TOKEN_FACTORY_H_ +#define BOUNDED_TOKEN_FACTORY_H_ + +namespace CVC4 { +namespace parser { + +#ifdef __cplusplus +extern "C" { +#endif + +/** Create a token factory with a pool of exactly <code>size</code> tokens, + * attached to the input stream <code>input</code>. <code>size</code> must be + * greater than the maximum lookahead in the parser, or tokens will be prematurely re-used. + * + * We abuse <code>pANTLR3_TOKEN_FACTORY</code> fields for our own purposes: + * <code>pANTLR3_COMMON_TOKEN *pools</code>: a pointer to a single-element array, a single pool of tokens + * <code>ANTLR3_INT32 thisPool</code>: the size of the pool + * <code>ANTLR3_UINT32 nextToken</code>: the index of the next token to be returned + * */ +pANTLR3_TOKEN_FACTORY +BoundedTokenFactoryNew(pANTLR3_INPUT_STREAM input,ANTLR3_UINT32 size); + +#ifdef __cplusplus +} +#endif + +} +} + + +#endif /* BOUNDED_TOKEN_FACTORY_H_ */ diff --git a/src/parser/cvc/.gitignore b/src/parser/cvc/.gitignore new file mode 100644 index 000000000..7fd0cf319 --- /dev/null +++ b/src/parser/cvc/.gitignore @@ -0,0 +1,4 @@ +/.deps +/stamp-generated +/generated +/Makefile.in diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g new file mode 100644 index 000000000..a9dff77bf --- /dev/null +++ b/src/parser/cvc/Cvc.g @@ -0,0 +1,496 @@ +/* ******************* */ +/* Cvc.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for CVC-LIB input language. + **/ + +grammar Cvc; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class CvcInput; +} +} + +extern +void SetCvcInput(CVC4::parser::CvcInput* input); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/cvc/cvc_input.h" +#include "util/output.h" +#include <vector> + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::CvcInput *input; + +extern +void SetCvcInput(CVC4::parser::CvcInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : formula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : c = command { $cmd = c; } + ; + +/** + * Matches a command of the input. If a declaration, it will return an empty + * command. + */ +command returns [CVC4::Command* cmd = 0] +@init { + Expr f; + Debug("parser-extra") << "command: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : ASSERT_TOK formula[f] SEMICOLON { cmd = new AssertCommand(f); } + | QUERY_TOK formula[f] SEMICOLON { cmd = new QueryCommand(f); } + | CHECKSAT_TOK formula[f] SEMICOLON { cmd = new CheckSatCommand(f); } + | CHECKSAT_TOK SEMICOLON { cmd = new CheckSatCommand(input->getTrueExpr()); } + | PUSH_TOK SEMICOLON { cmd = new PushCommand(); } + | POP_TOK SEMICOLON { cmd = new PopCommand(); } + | declaration[cmd] + | EOF + ; + +/** + * Match a declaration + */ + +declaration[CVC4::Command*& cmd] +@init { + std::vector<std::string> ids; + Type* t; + Debug("parser-extra") << "declaration: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : // FIXME: These could be type or function declarations, if that matters. + identifierList[ids, CHECK_UNDECLARED, SYM_VARIABLE] COLON declType[t,ids] SEMICOLON + { cmd = new DeclarationCommand(ids,t); } + ; + +/** Match the right-hand side of a declaration. Returns the type. */ +declType[CVC4::Type*& t, std::vector<std::string>& idList] +@init { + Debug("parser-extra") << "declType: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* A sort declaration (e.g., "T : TYPE") */ + TYPE_TOK { input->newSorts(idList); t = input->kindType(); } + | /* A variable declaration */ + type[t] { input->mkVars(idList,t); } + ; + +/** + * Match the type in a declaration and do the declaration binding. + * TODO: Actually parse sorts into Type objects. + */ +type[CVC4::Type*& t] +@init { + std::vector<Type*> typeList; + Debug("parser-extra") << "type: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* Simple type */ + baseType[t] + | /* Single-parameter function type */ + baseType[t] { typeList.push_back(t); } + ARROW_TOK baseType[t] + { t = input->functionType(typeList,t); } + | /* Multi-parameter function type */ + LPAREN baseType[t] + { typeList.push_back(t); } + (COMMA baseType[t] { typeList.push_back(t); } )+ + RPAREN ARROW_TOK baseType[t] + { t = input->functionType(typeList,t); } + ; + +/** + * Matches a list of identifiers separated by a comma and puts them in the + * given list. + * @param idList the list to fill with identifiers. + * @param check what kinds of check to perform on the symbols + */ +identifierList[std::vector<std::string>& idList, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] +@init { + std::string id; +} + : identifier[id,check,type] { idList.push_back(id); } + (COMMA identifier[id,check,type] { idList.push_back(id); })* + ; + + +/** + * Matches an identifier and returns a string. + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches a type. + * TODO: parse more types + */ +baseType[CVC4::Type*& t] +@init { + std::string id; + Debug("parser-extra") << "base type: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : BOOLEAN_TOK { t = input->booleanType(); } + | typeSymbol[t] + ; + +/** + * Matches a type identifier + */ +typeSymbol[CVC4::Type*& t] +@init { + std::string id; + Debug("parser-extra") << "type symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : identifier[id,CHECK_DECLARED,SYM_SORT] + { t = input->getSort(id); } + ; + +/** + * Matches a CVC4 formula. + * @return the expression representing the formula + */ +formula[CVC4::Expr& formula] +@init { + Debug("parser-extra") << "formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : iffFormula[formula] + ; + +/** + * Matches a comma-separated list of formulas + */ +formulaList[std::vector<CVC4::Expr>& formulas] +@init { + Expr f; +} + : formula[f] { formulas.push_back(f); } + ( COMMA formula[f] + { formulas.push_back(f); } + )* + ; + +/** + * Matches a Boolean IFF formula (right-associative) + */ +iffFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "<=> formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : impliesFormula[f] + ( IFF_TOK + iffFormula[e] + { f = input->mkExpr(CVC4::kind::IFF, f, e); } + )? + ; + +/** + * Matches a Boolean IMPLIES formula (right-associative) + */ +impliesFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "=> Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : orFormula[f] + ( IMPLIES_TOK impliesFormula[e] + { f = input->mkExpr(CVC4::kind::IMPLIES, f, e); } + )? + ; + +/** + * Matches a Boolean OR formula (left-associative) + */ +orFormula[CVC4::Expr& f] +@init { + std::vector<Expr> exprs; + Debug("parser-extra") << "OR Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : xorFormula[f] + ( OR_TOK { exprs.push_back(f); } + xorFormula[f] { exprs.push_back(f); } )* + { + if( exprs.size() > 0 ) { + f = input->mkExpr(CVC4::kind::OR, exprs); + } + } + ; + +/** + * Matches a Boolean XOR formula (left-associative) + */ +xorFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "XOR formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : andFormula[f] + ( XOR_TOK andFormula[e] + { f = input->mkExpr(CVC4::kind::XOR,f, e); } + )* + ; + +/** + * Matches a Boolean AND formula (left-associative) + */ +andFormula[CVC4::Expr& f] +@init { + std::vector<Expr> exprs; + Debug("parser-extra") << "AND Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : notFormula[f] + ( AND_TOK { exprs.push_back(f); } + notFormula[f] { exprs.push_back(f); } )* + { + if( exprs.size() > 0 ) { + f = input->mkExpr(CVC4::kind::AND, exprs); + } + } + ; + +/** + * Parses a NOT formula. + * @return the expression representing the formula + */ +notFormula[CVC4::Expr& f] +@init { + Debug("parser-extra") << "NOT formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* negation */ + NOT_TOK notFormula[f] + { f = input->mkExpr(CVC4::kind::NOT, f); } + | /* a boolean atom */ + predFormula[f] + ; + +predFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "predicate formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : term[f] + (EQUAL_TOK term[e] + { f = input->mkExpr(CVC4::kind::EQUAL, f, e); } + )? + ; // TODO: lt, gt, etc. + +/** + * Parses a term. + */ +term[CVC4::Expr& f] +@init { + std::string name; + std::vector<Expr> args; + Debug("parser-extra") << "term: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* function application */ + // { isFunction(AntlrInput::tokenText(LT(1))) }? + functionSymbol[f] + { args.push_back( f ); } + LPAREN formulaList[args] RPAREN + // TODO: check arity + { f = input->mkExpr(CVC4::kind::APPLY_UF, args); } + + | /* if-then-else */ + iteTerm[f] + + | /* parenthesized sub-formula */ + LPAREN formula[f] RPAREN + + /* constants */ + | TRUE_TOK { f = input->getTrueExpr(); } + | FALSE_TOK { f = input->getFalseExpr(); } + + | /* variable */ + identifier[name,CHECK_DECLARED,SYM_VARIABLE] + { f = input->getVariable(name); } + ; + +/** + * Parses an ITE term. + */ +iteTerm[CVC4::Expr& f] +@init { + std::vector<Expr> args; + Debug("parser-extra") << "ite: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : IF_TOK formula[f] { args.push_back(f); } + THEN_TOK formula[f] { args.push_back(f); } + iteElseTerm[f] { args.push_back(f); } + ENDIF_TOK + { f = input->mkExpr(CVC4::kind::ITE, args); } + ; + +/** + * Parses the else part of the ITE, i.e. ELSE f, or ELSIF b THEN f1 ... + */ +iteElseTerm[CVC4::Expr& f] +@init { + std::vector<Expr> args; + Debug("parser-extra") << "else: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : ELSE_TOK formula[f] + | ELSEIF_TOK iteCondition = formula[f] { args.push_back(f); } + THEN_TOK iteThen = formula[f] { args.push_back(f); } + iteElse = iteElseTerm[f] { args.push_back(f); } + { f = input->mkExpr(CVC4::kind::ITE, args); } + ; + +/** + * Parses a function symbol (an identifier). + * @param what kind of check to perform on the id + * @return the predicate symol + */ +functionSymbol[CVC4::Expr& f] +@init { + Debug("parser-extra") << "function symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; + std::string name; +} + : identifier[name,CHECK_DECLARED,SYM_FUNCTION] + { input->checkFunction(name); + f = input->getFunction(name); } + ; + + +// Keywords + +AND_TOK : 'AND'; +ASSERT_TOK : 'ASSERT'; +BOOLEAN_TOK : 'BOOLEAN'; +CHECKSAT_TOK : 'CHECKSAT'; +ECHO_TOK : 'ECHO'; +ELSEIF_TOK : 'ELSIF'; +ELSE_TOK : 'ELSE'; +ENDIF_TOK : 'ENDIF'; +FALSE_TOK : 'FALSE'; +IF_TOK : 'IF'; +NOT_TOK : 'NOT'; +OR_TOK : 'OR'; +POPTO_TOK : 'POPTO'; +POP_TOK : 'POP'; +PRINT_TOK : 'PRINT'; +PUSH_TOK : 'PUSH'; +QUERY_TOK : 'QUERY'; +THEN_TOK : 'THEN'; +TRUE_TOK : 'TRUE'; +TYPE_TOK : 'TYPE'; +XOR_TOK : 'XOR'; + +// Symbols + +COLON : ':'; +COMMA : ','; +LPAREN : '('; +RPAREN : ')'; +SEMICOLON : ';'; + +// Operators + +IMPLIES_TOK : '=>'; +IFF_TOK : '<=>'; +ARROW_TOK : '->'; +EQUAL_TOK : '='; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL: (DIGIT)+; + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment ALPHA : 'a'..'z' | 'A'..'Z'; + +/** + * Matches the digits (0-9) + */ +fragment DIGIT : '0'..'9'; + +/** + * Matches and skips whitespace in the input and ignores it. + */ +WHITESPACE : (' ' | '\t' | '\f' | '\r' | '\n') { $channel = HIDDEN;; }; + +/** + * Matches the comments and ignores them + */ +COMMENT : '%' (~('\n' | '\r'))* { $channel = HIDDEN;; }; + diff --git a/src/parser/cvc/Makefile.am b/src/parser/cvc/Makefile.am index c1b5f752e..f02c9345c 100644 --- a/src/parser/cvc/Makefile.am +++ b/src/parser/cvc/Makefile.am @@ -1,30 +1,30 @@ AM_CPPFLAGS = \ -D__BUILDING_CVC4PARSERLIB \ - -I@srcdir@/../../include -I@srcdir@/../.. + -I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall -fvisibility=hidden +# Compile generated C files using C++ compiler +CC=$(CXX) noinst_LTLIBRARIES = libparsercvc.la ANTLR_TOKEN_STUFF = \ - @srcdir@/generated/CvcVocabularyTokenTypes.hpp \ - @srcdir@/generated/CvcVocabularyTokenTypes.txt \ - @srcdir@/generated/AntlrCvcParserTokenTypes.hpp \ - @srcdir@/generated/AntlrCvcParserTokenTypes.txt + @srcdir@/generated/Cvc.tokens ANTLR_LEXER_STUFF = \ - @srcdir@/generated/AntlrCvcLexer.hpp \ - @srcdir@/generated/AntlrCvcLexer.cpp \ - $(ANTLR_TOKEN_STUFF) + @srcdir@/generated/CvcLexer.h \ + @srcdir@/generated/CvcLexer.c \ + $(ANTLR_TOKEN_STUFF) ANTLR_PARSER_STUFF = \ - @srcdir@/generated/AntlrCvcParser.hpp \ - @srcdir@/generated/AntlrCvcParser.cpp + @srcdir@/generated/CvcParser.h \ + @srcdir@/generated/CvcParser.c ANTLR_STUFF = \ - $(ANTLR_LEXER_STUFF) \ - $(ANTLR_PARSER_STUFF) + $(ANTLR_LEXER_STUFF) \ + $(ANTLR_PARSER_STUFF) libparsercvc_la_SOURCES = \ - cvc_lexer.g \ - cvc_parser.g \ - $(ANTLR_STUFF) + Cvc.g \ + cvc_input.h \ + cvc_input.cpp \ + $(ANTLR_STUFF) BUILT_SOURCES = $(ANTLR_STUFF) dist-hook: $(ANTLR_STUFF) @@ -36,16 +36,14 @@ maintainer-clean-local: @srcdir@/stamp-generated: mkdir -p @srcdir@/generated touch @srcdir@/stamp-generated + # antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. -@srcdir@/generated/AntlrCvcLexer.hpp: cvc_lexer.g @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_LEXER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/cvc_lexer.g" -@srcdir@/generated/AntlrCvcLexer.cpp $(ANTLR_TOKEN_STUFF): @srcdir@/generated/AntlrCvcLexer.hpp -# doesn't actually depend on the lexer, but if we're doing parallel +@srcdir@/generated/CvcLexer.h: Cvc.g @srcdir@/stamp-generated + -rm -f $(ANTLR_STUFF) + $(ANTLR) -o "@srcdir@/generated" "@srcdir@/Cvc.g" + +# These don't actually depend on CvcLexer.h, but if we're doing parallel # make and the lexer needs to be rebuilt, we have to keep the rules # from running in parallel (since the token files will be deleted & # recreated) -@srcdir@/generated/AntlrCvcParser.hpp: cvc_parser.g cvc_lexer.g $(ANTLR_TOKEN_STUFF) @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_PARSER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/cvc_parser.g" -@srcdir@/generated/AntlrCvcParser.cpp: @srcdir@/generated/AntlrCvcParser.hpp +@srcdir@/generated/CvcLexer.c @srcdir@/generated/CvcParser.h @srcdir@/generated/CvcParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/CvcLexer.h diff --git a/src/parser/cvc/cvc_input.cpp b/src/parser/cvc/cvc_input.cpp new file mode 100644 index 000000000..9de608aae --- /dev/null +++ b/src/parser/cvc/cvc_input.cpp @@ -0,0 +1,73 @@ +/* + * cvc_parser.cpp + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#include <antlr3.h> + +#include "expr/expr_manager.h" +#include "parser/parser_exception.h" +#include "parser/cvc/cvc_input.h" +#include "parser/cvc/generated/CvcLexer.h" +#include "parser/cvc/generated/CvcParser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=2 */ +CvcInput::CvcInput(ExprManager* exprManager, const std::string& filename, bool useMmap) : + AntlrInput(exprManager,filename,2,useMmap) { + init(); +} + +CvcInput::CvcInput(ExprManager* exprManager, const std::string& input, const std::string& name) : + AntlrInput(exprManager,input,name,2) { + init(); +} + +void CvcInput::init() { + pANTLR3_INPUT_STREAM input = getInputStream(); + AlwaysAssert( input != NULL ); + + d_pCvcLexer = CvcLexerNew(input); + if( d_pCvcLexer == NULL ) { + throw ParserException("Failed to create CVC lexer."); + } + + setLexer( d_pCvcLexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pCvcParser = CvcParserNew(tokenStream); + if( d_pCvcParser == NULL ) { + throw ParserException("Failed to create CVC parser."); + } + + setParser(d_pCvcParser->pParser); + SetCvcInput(this); +} + + +CvcInput::~CvcInput() { + d_pCvcLexer->free(d_pCvcLexer); + d_pCvcParser->free(d_pCvcParser); +} + +Command* CvcInput::doParseCommand() throw (ParserException) { + return d_pCvcParser->parseCommand(d_pCvcParser); +} + +Expr CvcInput::doParseExpr() throw (ParserException) { + return d_pCvcParser->parseExpr(d_pCvcParser); +} + +pANTLR3_LEXER CvcInput::getLexer() { + return d_pCvcLexer->pLexer; +} + +} // namespace parser + +} // namespace CVC4 diff --git a/src/parser/cvc/cvc_input.h b/src/parser/cvc/cvc_input.h new file mode 100644 index 000000000..659123401 --- /dev/null +++ b/src/parser/cvc/cvc_input.h @@ -0,0 +1,48 @@ +/* + * cvc_parser.h + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#ifndef CVC_PARSER_H_ +#define CVC_PARSER_H_ + +#include "parser/antlr_input.h" +#include "parser/cvc/generated/CvcLexer.h" +#include "parser/cvc/generated/CvcParser.h" + +// extern void CvcParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class CvcInput : public AntlrInput { +public: + CvcInput(ExprManager* exprManager, const std::string& filename, bool useMmap); + CvcInput(ExprManager* exprManager, const std::string& input, const std::string& name); + ~CvcInput(); + +protected: + Command* doParseCommand() throw(ParserException); + Expr doParseExpr() throw(ParserException); + pANTLR3_LEXER getLexer(); + pANTLR3_LEXER createLexer(pANTLR3_INPUT_STREAM input); + pANTLR3_PARSER createParser(pANTLR3_COMMON_TOKEN_STREAM tokenStream); + +private: + void init(); + pCvcLexer d_pCvcLexer; + pCvcParser d_pCvcParser; +}; // class CvcInput + +} // namespace parser + +} // namespace CVC4 + +#endif /* CVC_PARSER_H_ */ diff --git a/src/parser/cvc/cvc_lexer.g b/src/parser/cvc/cvc_lexer.g deleted file mode 100644 index b5bf90015..000000000 --- a/src/parser/cvc/cvc_lexer.g +++ /dev/null @@ -1,151 +0,0 @@ -/* ******************* */ -/* cvc_lexer.g - ** Original author: dejan - ** Major contributors: mdeters - ** Minor contributors (to current version): cconway - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Lexer for CVC presentation language. - **/ - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrCvcLexer class is a stream tokenizer (lexer) for the CVC language. - */ -class AntlrCvcLexer extends Lexer; - -options { - exportVocab = CvcVocabulary; // Name of the shared token vocabulary - testLiterals = false; // Do not check for literals by default - defaultErrorHandler = false; // Skip the defaul error handling, just break with exceptions - k = 2; -} - -/* - * The tokens that might match with the identifiers go here. Otherwise define - * them below. - */ -tokens { - // Types - BOOLEAN = "BOOLEAN"; - TYPE = "TYPE"; - // Boolean oparators - AND = "AND"; - IF = "IF"; - THEN = "THEN"; - ELSE = "ELSE"; - ELSEIF = "ELSIF"; - ENDIF = "ENDIF"; - NOT = "NOT"; - OR = "OR"; - TRUE = "TRUE"; - FALSE = "FALSE"; - XOR = "XOR"; - // Commands - ASSERT = "ASSERT"; - QUERY = "QUERY"; - CHECKSAT = "CHECKSAT"; - PRINT = "PRINT"; - ECHO = "ECHO"; - - PUSH = "PUSH"; - POP = "POP"; - POPTO = "POPTO"; -} - -// Operators -COMMA : ','; -IMPLIES : "=>"; -IFF : "<=>"; -RIGHT_ARROW : "->"; -EQUAL : "="; - -/** - * Matches any letter ('a'-'z' and 'A'-'Z'). - */ -protected -ALPHA options { paraphrase = "a letter"; } - : 'a'..'z' - | 'A'..'Z' - ; - -/** - * Matches the digits (0-9) - */ -protected -DIGIT options { paraphrase = "a digit"; } - : '0'..'9' - ; - -/** - * Matches the ':' - */ -COLON options { paraphrase = "a colon"; } - : ':' - ; - -/** - * Matches the 'l' - */ -SEMICOLON options { paraphrase = "a semicolon"; } - : ';' - ; - -/** - * Matches an identifier from the input. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a letter. - */ -IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; } - : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -/** - * Matches the left bracket ('('). - */ -LPAREN options { paraphrase = "a left parenthesis '('"; } - : '('; - -/** - * Matches the right bracket ('('). - */ -RPAREN options { paraphrase = "a right parenthesis ')'"; } - : ')'; - -/** - * Matches and skips whitespace in the input and ignores it. - */ -WHITESPACE options { paraphrase = "whitespace"; } - : (' ' | '\t' | '\f') { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches and skips the newline symbols in the input. - */ -NEWLINE options { paraphrase = "a newline"; } - : ('\r' '\n' | '\r' | '\n') { $setType(antlr::Token::SKIP); newline(); } - ; - -/** - * Matches the comments and ignores them - */ -COMMENT options { paraphrase = "comment"; } - : '%' (~('\n' | '\r'))* { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches a numeral from the input (non-empty sequence of digits). - */ -NUMERAL options { paraphrase = "a numeral"; } - : (DIGIT)+ - ; diff --git a/src/parser/cvc/cvc_parser.g b/src/parser/cvc/cvc_parser.g deleted file mode 100644 index acf0394d0..000000000 --- a/src/parser/cvc/cvc_parser.g +++ /dev/null @@ -1,386 +0,0 @@ -/* ******************* */ -/* cvc_parser.g - ** Original author: dejan - ** Major contributors: cconway - ** Minor contributors (to current version): mdeters - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser for CVC presentation language. - **/ - -header "post_include_hpp" { -#include "parser/antlr_parser.h" -#include "expr/command.h" -#include "expr/type.h" -#include "util/output.h" -} - -header "post_include_cpp" { -#include <vector> - -using namespace std; -using namespace CVC4; -using namespace CVC4::parser; -} - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrCvcParser class is the parser for the files in CVC format. - */ -class AntlrCvcParser extends Parser("AntlrParser"); -options { - genHashLines = true; // Include line number information - importVocab = CvcVocabulary; // Import the common vocabulary - defaultErrorHandler = false; // Skip the defaul error handling, just break with exceptions - k = 2; -} - -/** - * Parses the next command. - * @return command or 0 if EOF - */ -parseCommand returns [CVC4::Command* cmd] - : cmd = command - ; - -/** - * Parses the next expression. - * @return the parsed expression (null expression if EOF) - */ -parseExpr returns [CVC4::Expr expr] - : expr = formula - | EOF - ; - -/** - * Matches a command of the input. If a declaration, it will return an empty - * command. - */ -command returns [CVC4::Command* cmd = 0] -{ - Expr f; - Debug("parser") << "command: " << LT(1)->getText() << endl; -} - : ASSERT f = formula SEMICOLON { cmd = new AssertCommand(f); } - | QUERY f = formula SEMICOLON { cmd = new QueryCommand(f); } - | CHECKSAT f = formula SEMICOLON { cmd = new CheckSatCommand(f); } - | CHECKSAT SEMICOLON { cmd = new CheckSatCommand(getTrueExpr()); } - | PUSH SEMICOLON { cmd = new PushCommand(); } - | POP SEMICOLON { cmd = new PopCommand(); } - | cmd = declaration - | EOF - ; - -/** - * Match a declaration - */ - -declaration returns [CVC4::DeclarationCommand* cmd] -{ - vector<string> ids; - Type* t; - Debug("parser") << "declaration: " << LT(1)->getText() << endl; -} - : identifierList[ids, CHECK_UNDECLARED] COLON t = declType[ids] SEMICOLON - { cmd = new DeclarationCommand(ids,t); } - ; - -/** Match the right-hand side of a declaration. Returns the type. */ -declType[std::vector<std::string>& idList] returns [CVC4::Type* t] -{ - Debug("parser") << "declType: " << LT(1)->getText() << endl; -} - : /* A sort declaration (e.g., "T : TYPE") */ - TYPE { newSorts(idList); t = kindType(); } - | /* A variable declaration */ - t = type { mkVars(idList,t); } - ; - -/** - * Match the type in a declaration and do the declaration binding. - * TODO: Actually parse sorts into Type objects. - */ -type returns [CVC4::Type* t] -{ - Type *t1, *t2; - Debug("parser") << "type: " << LT(1)->getText() << endl; -} - : /* Simple type */ - t = baseType - | /* Single-parameter function type */ - t1 = baseType RIGHT_ARROW t2 = baseType - { t = functionType(t1,t2); } - | /* Multi-parameter function type */ - LPAREN t1 = baseType - { std::vector<Type*> argTypes; - argTypes.push_back(t1); } - (COMMA t1 = baseType { argTypes.push_back(t1); } )+ - RPAREN RIGHT_ARROW t2 = baseType - { t = functionType(argTypes,t2); } - ; - -/** - * Matches a list of identifiers separated by a comma and puts them in the - * given list. - * @param idList the list to fill with identifiers. - * @param check what kinds of check to perform on the symbols - */ -identifierList[std::vector<std::string>& idList, - DeclarationCheck check = CHECK_NONE] -{ - string id; -} - : id = identifier[check] { idList.push_back(id); } - (COMMA id = identifier[check] { idList.push_back(id); })* - ; - - -/** - * Matches an identifier and returns a string. - */ -identifier[DeclarationCheck check = CHECK_NONE, - SymbolType type = SYM_VARIABLE] -returns [std::string id] - : x:IDENTIFIER - { id = x->getText(); - checkDeclaration(id, check, type); } - ; - -/** - * Matches a type. - * TODO: parse more types - */ -baseType returns [CVC4::Type* t] -{ - std::string id; - Debug("parser") << "base type: " << LT(1)->getText() << endl; -} - : BOOLEAN { t = booleanType(); } - | t = typeSymbol - ; - -/** - * Matches a type identifier - */ -typeSymbol returns [CVC4::Type* t] -{ - std::string id; - Debug("parser") << "type symbol: " << LT(1)->getText() << endl; -} - : id = identifier[CHECK_DECLARED,SYM_SORT] - { t = getSort(id); } - ; - -/** - * Matches a CVC4 formula. - * @return the expression representing the formula - */ -formula returns [CVC4::Expr formula] -{ - Debug("parser") << "formula: " << LT(1)->getText() << endl; -} - : formula = iffFormula - ; - -/** - * Matches a comma-separated list of formulas - */ -formulaList[std::vector<CVC4::Expr>& formulas] -{ - Expr f; -} - : f = formula { formulas.push_back(f); } - ( COMMA f = formula - { formulas.push_back(f); } - )* - ; - -/** - * Matches a Boolean IFF formula (right-associative) - */ -iffFormula returns [CVC4::Expr f] -{ - Expr e; - Debug("parser") << "<=> formula: " << LT(1)->getText() << endl; -} - : f = impliesFormula - ( IFF e = iffFormula - { f = mkExpr(CVC4::kind::IFF, f, e); } - )? - ; - -/** - * Matches a Boolean IMPLIES formula (right-associative) - */ -impliesFormula returns [CVC4::Expr f] -{ - Expr e; - Debug("parser") << "=> Formula: " << LT(1)->getText() << endl; -} - : f = orFormula - ( IMPLIES e = impliesFormula - { f = mkExpr(CVC4::kind::IMPLIES, f, e); } - )? - ; - -/** - * Matches a Boolean OR formula (left-associative) - */ -orFormula returns [CVC4::Expr f] -{ - Expr e; - vector<Expr> exprs; - Debug("parser") << "OR Formula: " << LT(1)->getText() << endl; -} - : e = xorFormula { exprs.push_back(e); } - ( OR e = xorFormula { exprs.push_back(e); } )* - { - f = (exprs.size() > 1 ? mkExpr(CVC4::kind::OR, exprs) : exprs[0]); - } - ; - -/** - * Matches a Boolean XOR formula (left-associative) - */ -xorFormula returns [CVC4::Expr f] -{ - Expr e; - Debug("parser") << "XOR formula: " << LT(1)->getText() << endl; -} - : f = andFormula - ( XOR e = andFormula - { f = mkExpr(CVC4::kind::XOR,f, e); } - )* - ; - -/** - * Matches a Boolean AND formula (left-associative) - */ -andFormula returns [CVC4::Expr f] -{ - Expr e; - vector<Expr> exprs; - Debug("parser") << "AND Formula: " << LT(1)->getText() << endl; -} - : e = notFormula { exprs.push_back(e); } - ( AND e = notFormula { exprs.push_back(e); } )* - { - f = (exprs.size() > 1 ? mkExpr(CVC4::kind::AND, exprs) : exprs[0]); - } - ; - -/** - * Parses a NOT formula. - * @return the expression representing the formula - */ -notFormula returns [CVC4::Expr f] -{ - Debug("parser") << "NOT formula: " << LT(1)->getText() << endl; -} - : /* negation */ - NOT f = notFormula - { f = mkExpr(CVC4::kind::NOT, f); } - | /* a boolean atom */ - f = predFormula - ; - -predFormula returns [CVC4::Expr f] -{ - Debug("parser") << "predicate formula: " << LT(1)->getText() << endl; -} - : { Expr e; } - f = term - (EQUAL e = term - { f = mkExpr(CVC4::kind::EQUAL, f, e); } - )? - ; // TODO: lt, gt, etc. - -/** - * Parses a term. - */ -term returns [CVC4::Expr t] -{ - std::string name; - Debug("parser") << "term: " << LT(1)->getText() << endl; -} - : /* function application */ - // { isFunction(LT(1)->getText()) }? - { Expr f; - std::vector<CVC4::Expr> args; } - f = functionSymbol[CHECK_DECLARED] - { args.push_back( f ); } - - LPAREN formulaList[args] RPAREN - // TODO: check arity - { t = mkExpr(CVC4::kind::APPLY_UF, args); } - - | /* if-then-else */ - t = iteTerm - - | /* parenthesized sub-formula */ - LPAREN t = formula RPAREN - - /* constants */ - | TRUE { t = getTrueExpr(); } - | FALSE { t = getFalseExpr(); } - - | /* variable */ - name = identifier[CHECK_DECLARED] - { t = getVariable(name); } - ; - -/** - * Parses an ITE term. - */ -iteTerm returns [CVC4::Expr t] -{ - Expr iteCondition, iteThen, iteElse; - Debug("parser") << "ite: " << LT(1)->getText() << endl; -} - : IF iteCondition = formula - THEN iteThen = formula - iteElse = iteElseTerm - ENDIF - { t = mkExpr(CVC4::kind::ITE, iteCondition, iteThen, iteElse); } - ; - -/** - * Parses the else part of the ITE, i.e. ELSE f, or ELSIF b THEN f1 ... - */ -iteElseTerm returns [CVC4::Expr t] -{ - Expr iteCondition, iteThen, iteElse; - Debug("parser") << "else: " << LT(1)->getText() << endl; -} - : ELSE t = formula - | ELSEIF iteCondition = formula - THEN iteThen = formula - iteElse = iteElseTerm - { t = mkExpr(CVC4::kind::ITE, iteCondition, iteThen, iteElse); } - ; - -/** - * Parses a function symbol (an identifier). - * @param what kind of check to perform on the id - * @return the predicate symol - */ -functionSymbol[DeclarationCheck check = CHECK_NONE] returns [CVC4::Expr f] -{ - Debug("parser") << "function symbol: " << LT(1)->getText() << endl; - std::string name; -} - : name = identifier[check,SYM_FUNCTION] - { checkFunction(name); - f = getFunction(name); } - ; diff --git a/src/parser/antlr_parser.cpp b/src/parser/input.cpp index 98fde0556..24ad93d05 100644 --- a/src/parser/antlr_parser.cpp +++ b/src/parser/input.cpp @@ -1,8 +1,8 @@ /********************* */ -/** antlr_parser.cpp - ** Original author: dejan - ** Major contributors: cconway - ** Minor contributors (to current version): mdeters +/** parser.cpp + ** Original author: mdeters + ** Major contributors: dejan + ** Minor contributors (to current version): cconway ** This file is part of the CVC4 prototype. ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences @@ -10,46 +10,153 @@ ** See the file COPYING in the top-level source directory for licensing ** information. ** - ** A super-class for ANTLR-generated input language parsers + ** Parser implementation. **/ -/* - * antlr_parser.cpp - * - * Created on: Nov 30, 2009 - * Author: dejan - */ - #include <iostream> -#include <limits.h> +#include <fstream> +#include <stdint.h> -#include "antlr_parser.h" -#include "util/output.h" -#include "util/Assert.h" +#include "input.h" #include "expr/command.h" +#include "expr/expr.h" +#include "expr/kind.h" #include "expr/type.h" +#include "util/output.h" +#include "util/Assert.h" +#include "parser/parser_exception.h" +#include "parser/cvc/cvc_input.h" +#include "parser/smt/smt_input.h" using namespace std; -using namespace CVC4; -using namespace CVC4::parser; using namespace CVC4::kind; namespace CVC4 { namespace parser { -AntlrParser::AntlrParser(const antlr::ParserSharedInputState& state, int k) : - antlr::LLkParser(state, k), d_checksEnabled(true) { +void Input::setDone(bool done) { + d_done = done; } -AntlrParser::AntlrParser(antlr::TokenBuffer& tokenBuf, int k) : - antlr::LLkParser(tokenBuf, k), d_checksEnabled(true) { +bool Input::done() const { + return d_done; } -AntlrParser::AntlrParser(antlr::TokenStream& lexer, int k) : - antlr::LLkParser(lexer, k), d_checksEnabled(true) { +Command* Input::parseNextCommand() throw (ParserException) { + Debug("parser") << "parseNextCommand()" << std::endl; + Command* cmd = NULL; + if(!done()) { + try { + cmd = doParseCommand(); + if(cmd == NULL) { + setDone(); + } + } catch(ParserException& e) { + setDone(); + throw ParserException(e.toString()); + } + } + Debug("parser") << "parseNextCommand() => " << cmd << std::endl; + return cmd; +} + +Expr Input::parseNextExpression() throw (ParserException) { + Debug("parser") << "parseNextExpression()" << std::endl; + Expr result; + if(!done()) { + try { + result = doParseExpr(); + if(result.isNull()) + setDone(); + } catch(ParserException& e) { + setDone(); + throw ParserException(e.toString()); + } + } + Debug("parser") << "parseNextExpression() => " << result << std::endl; + return result; +} + +Input::~Input() { } -Expr AntlrParser::getSymbol(const std::string& name, SymbolType type) { +Input::Input(ExprManager* exprManager, const std::string& filename) : + d_exprManager(exprManager), + d_filename(filename), + d_done(false), + d_checksEnabled(true) { +} + +Input* Input::newFileParser(ExprManager* em, InputLanguage lang, + const std::string& filename, bool useMmap) { + + Input* parser = 0; + + switch(lang) { + case LANG_CVC4: + parser = new CvcInput(em,filename,useMmap); + break; + case LANG_SMTLIB: + parser = new SmtInput(em,filename,useMmap); + break; + + default: + Unhandled(lang); + } + + return parser; +} + +/* +Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, + istream& input, string filename) { + antlr::CharBuffer* inputBuffer = new CharBuffer(input); + return getNewParser(em, lang, inputBuffer, filename); +} +*/ + +/* +Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, + std::istream& input, const std::string& name) { + Parser* parser = 0; + + switch(lang) { + case LANG_CVC4: { + antlrLexer = new AntlrCvcLexer(*inputBuffer); + antlrParser = new AntlrCvcParser(*antlrLexer); + break; + } + case LANG_SMTLIB: + parser = new Smt(em,input,name); + break; + + default: + Unhandled("Unknown Input language!"); + } + return parser; +} +*/ + +Input* Input::newStringParser(ExprManager* em, InputLanguage lang, + const std::string& input, const std::string& name) { + Input* parser = 0; + + switch(lang) { + case LANG_CVC4: { + parser = new CvcInput(em,input,name); + break; + } + case LANG_SMTLIB: + parser = new SmtInput(em,input,name); + break; + + default: + Unhandled(lang); + } + return parser; +} + +Expr Input::getSymbol(const std::string& name, SymbolType type) { Assert( isDeclared(name, type) ); @@ -64,87 +171,92 @@ Expr AntlrParser::getSymbol(const std::string& name, SymbolType type) { } } -Expr AntlrParser::getVariable(const std::string& name) { + +Expr Input::getVariable(const std::string& name) { return getSymbol(name, SYM_VARIABLE); } -Expr AntlrParser::getFunction(const std::string& name) { +Expr Input::getFunction(const std::string& name) { return getSymbol(name, SYM_FUNCTION); } -Type* AntlrParser::getType(const std::string& var_name, - SymbolType type) { +Type* +Input::getType(const std::string& var_name, + SymbolType type) { Assert( isDeclared(var_name, type) ); - Type* t = getSymbol(var_name, type).getType(); + Type* t = getSymbol(var_name,type).getType(); return t; } -Type* AntlrParser::getSort(const std::string& name) { +Type* Input::getSort(const std::string& name) { Assert( isDeclared(name, SYM_SORT) ); Type* t = d_sortTable.getObject(name); return t; } /* Returns true if name is bound to a boolean variable. */ -bool AntlrParser::isBoolean(const std::string& name) { +bool Input::isBoolean(const std::string& name) { return isDeclared(name, SYM_VARIABLE) && getType(name)->isBoolean(); } /* Returns true if name is bound to a function. */ -bool AntlrParser::isFunction(const std::string& name) { +bool Input::isFunction(const std::string& name) { return isDeclared(name, SYM_FUNCTION) && getType(name)->isFunction(); } /* Returns true if name is bound to a function returning boolean. */ -bool AntlrParser::isPredicate(const std::string& name) { +bool Input::isPredicate(const std::string& name) { return isDeclared(name, SYM_FUNCTION) && getType(name)->isPredicate(); } -Expr AntlrParser::getTrueExpr() const { +Expr Input::getTrueExpr() const { return d_exprManager->mkExpr(TRUE); } -Expr AntlrParser::getFalseExpr() const { +Expr Input::getFalseExpr() const { return d_exprManager->mkExpr(FALSE); } -Expr AntlrParser::mkExpr(Kind kind, const Expr& child) { +Expr Input::mkExpr(Kind kind, const Expr& child) { Expr result = d_exprManager->mkExpr(kind, child); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Expr AntlrParser::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2) { +Expr Input::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2) { Expr result = d_exprManager->mkExpr(kind, child_1, child_2); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Expr AntlrParser::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, +Expr Input::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, const Expr& child_3) { Expr result = d_exprManager->mkExpr(kind, child_1, child_2, child_3); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Expr AntlrParser::mkExpr(Kind kind, const std::vector<Expr>& children) { +Expr Input::mkExpr(Kind kind, const std::vector<Expr>& children) { Expr result = d_exprManager->mkExpr(kind, children); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Type* AntlrParser::functionType(Type* domainType, - Type* rangeType) { - return d_exprManager->mkFunctionType(domainType, rangeType); +Type* +Input::functionType(Type* domainType, + Type* rangeType) { + return d_exprManager->mkFunctionType(domainType,rangeType); } -Type* AntlrParser::functionType(const std::vector<Type*>& argTypes, - Type* rangeType) { +Type* +Input::functionType(const std::vector<Type*>& argTypes, + Type* rangeType) { Assert( argTypes.size() > 0 ); - return d_exprManager->mkFunctionType(argTypes, rangeType); + return d_exprManager->mkFunctionType(argTypes,rangeType); } -Type* AntlrParser::functionType(const std::vector<Type*>& sorts) { +Type* +Input::functionType(const std::vector<Type*>& sorts) { Assert( sorts.size() > 0 ); if( sorts.size() == 1 ) { return sorts[0]; @@ -152,11 +264,11 @@ Type* AntlrParser::functionType(const std::vector<Type*>& sorts) { std::vector<Type*> argTypes(sorts); Type* rangeType = argTypes.back(); argTypes.pop_back(); - return functionType(argTypes, rangeType); + return functionType(argTypes,rangeType); } } -Type* AntlrParser::predicateType(const std::vector<Type*>& sorts) { +Type* Input::predicateType(const std::vector<Type*>& sorts) { if(sorts.size() == 0) { return d_exprManager->booleanType(); } else { @@ -164,15 +276,16 @@ Type* AntlrParser::predicateType(const std::vector<Type*>& sorts) { } } -Expr AntlrParser::mkVar(const std::string& name, Type* type) { +Expr +Input::mkVar(const std::string& name, Type* type) { Debug("parser") << "mkVar(" << name << "," << *type << ")" << std::endl; Expr expr = d_exprManager->mkVar(type, name); - defineVar(name, expr); + defineVar(name,expr); return expr; } const std::vector<Expr> -AntlrParser::mkVars(const std::vector<std::string> names, +Input::mkVars(const std::vector<std::string> names, Type* type) { std::vector<Expr> vars; for(unsigned i = 0; i < names.size(); ++i) { @@ -181,30 +294,41 @@ AntlrParser::mkVars(const std::vector<std::string> names, return vars; } -void AntlrParser::defineVar(const std::string& name, const Expr& val) { +void +Input::defineVar(const std::string& name, const Expr& val) { Assert(!isDeclared(name)); - d_varSymbolTable.bindName(name, val); + d_varSymbolTable.bindName(name,val); Assert(isDeclared(name)); } -void AntlrParser::undefineVar(const std::string& name) { +void +Input::undefineVar(const std::string& name) { Assert(isDeclared(name)); d_varSymbolTable.unbindName(name); Assert(!isDeclared(name)); } +void +Input::setLogic(const std::string& name) { + if( name == "QF_UF" ) { + newSort("U"); + } else { + Unhandled(name); + } +} -Type* AntlrParser::newSort(const std::string& name) { +Type* +Input::newSort(const std::string& name) { Debug("parser") << "newSort(" << name << ")" << std::endl; - Assert( !isDeclared(name, SYM_SORT) ); + Assert( !isDeclared(name, SYM_SORT) ) ; Type* type = d_exprManager->mkSort(name); d_sortTable.bindName(name, type); - Assert( isDeclared(name, SYM_SORT) ); + Assert( isDeclared(name, SYM_SORT) ) ; return type; } const std::vector<Type*> -AntlrParser::newSorts(const std::vector<std::string>& names) { +Input::newSorts(const std::vector<std::string>& names) { std::vector<Type*> types; for(unsigned i = 0; i < names.size(); ++i) { types.push_back(newSort(names[i])); @@ -212,23 +336,15 @@ AntlrParser::newSorts(const std::vector<std::string>& names) { return types; } -void AntlrParser::setLogic(const std::string& name) { - if( name == "QF_UF" ) { - newSort("U"); - } else { - Unhandled(name); - } -} - -BooleanType* AntlrParser::booleanType() { +BooleanType* Input::booleanType() { return d_exprManager->booleanType(); } -KindType* AntlrParser::kindType() { +KindType* Input::kindType() { return d_exprManager->kindType(); } -unsigned int AntlrParser::minArity(Kind kind) { +unsigned int Input::minArity(Kind kind) { switch(kind) { case FALSE: case SKOLEM: @@ -242,6 +358,7 @@ unsigned int AntlrParser::minArity(Kind kind) { return 1; case APPLY_UF: + case DISTINCT: case EQUAL: case IFF: case IMPLIES: @@ -257,7 +374,7 @@ unsigned int AntlrParser::minArity(Kind kind) { } } -unsigned int AntlrParser::maxArity(Kind kind) { +unsigned int Input::maxArity(Kind kind) { switch(kind) { case FALSE: case SKOLEM: @@ -279,6 +396,7 @@ unsigned int AntlrParser::maxArity(Kind kind) { case AND: case APPLY_UF: + case DISTINCT: case PLUS: case OR: return UINT_MAX; @@ -288,11 +406,7 @@ unsigned int AntlrParser::maxArity(Kind kind) { } } -void AntlrParser::setExpressionManager(ExprManager* em) { - d_exprManager = em; -} - -bool AntlrParser::isDeclared(const std::string& name, SymbolType type) { +bool Input::isDeclared(const std::string& name, SymbolType type) { switch(type) { case SYM_VARIABLE: // Functions share var namespace case SYM_FUNCTION: @@ -304,17 +418,10 @@ bool AntlrParser::isDeclared(const std::string& name, SymbolType type) { } } -void AntlrParser::parseError(const std::string& message) - throw (antlr::SemanticException) { - throw antlr::SemanticException(message, getFilename(), - LT(1).get()->getLine(), - LT(1).get()->getColumn()); -} - -void AntlrParser::checkDeclaration(const std::string& varName, +void Input::checkDeclaration(const std::string& varName, DeclarationCheck check, SymbolType type) - throw (antlr::SemanticException) { + throw (ParserException) { if(!d_checksEnabled) { return; } @@ -340,15 +447,15 @@ void AntlrParser::checkDeclaration(const std::string& varName, } } -void AntlrParser::checkFunction(const std::string& name) - throw (antlr::SemanticException) { +void Input::checkFunction(const std::string& name) + throw (ParserException) { if( d_checksEnabled && !isFunction(name) ) { parseError("Expecting function symbol, found '" + name + "'"); } } -void AntlrParser::checkArity(Kind kind, unsigned int numArgs) - throw (antlr::SemanticException) { +void Input::checkArity(Kind kind, unsigned int numArgs) + throw (ParserException) { if(!d_checksEnabled) { return; } @@ -370,11 +477,11 @@ void AntlrParser::checkArity(Kind kind, unsigned int numArgs) } } -void AntlrParser::enableChecks() { +void Input::enableChecks() { d_checksEnabled = true; } -void AntlrParser::disableChecks() { +void Input::disableChecks() { d_checksEnabled = false; } diff --git a/src/parser/input.h b/src/parser/input.h new file mode 100644 index 000000000..68db0f5dd --- /dev/null +++ b/src/parser/input.h @@ -0,0 +1,394 @@ +/********************* */ +/** parser.h + ** Original author: mdeters + ** Major contributors: dejan + ** Minor contributors (to current version): cconway + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser abstraction. + **/ + +#ifndef __CVC4__PARSER__PARSER_H +#define __CVC4__PARSER__PARSER_H + +#include <string> +#include <iostream> + +#include "cvc4_config.h" +#include "expr/expr.h" +#include "expr/kind.h" +#include "parser/parser_exception.h" +#include "parser/parser_options.h" +#include "parser/symbol_table.h" +#include "util/Assert.h" + +namespace CVC4 { + +// Forward declarations +class BooleanType; +class ExprManager; +class Command; +class FunctionType; +class KindType; +class Type; + +namespace parser { + +/** Types of check for the symols */ +enum DeclarationCheck { + /** Enforce that the symbol has been declared */ + CHECK_DECLARED, + /** Enforce that the symbol has not been declared */ + CHECK_UNDECLARED, + /** Don't check anything */ + CHECK_NONE +}; + +/** Returns a string representation of the given object (for + debugging). */ +inline std::string toString(DeclarationCheck check) { + switch(check) { + case CHECK_NONE: return "CHECK_NONE"; + case CHECK_DECLARED: return "CHECK_DECLARED"; + case CHECK_UNDECLARED: return "CHECK_UNDECLARED"; + default: Unreachable(); + } +} + +/** + * Types of symbols. Used to define namespaces. + */ +enum SymbolType { + /** Variables */ + SYM_VARIABLE, + /** Functions */ + SYM_FUNCTION, + /** Sorts */ + SYM_SORT +}; + +/** Returns a string representation of the given object (for + debugging). */ +inline std::string toString(SymbolType type) { + switch(type) { + case SYM_VARIABLE: return "SYM_VARIABLE"; + case SYM_FUNCTION: return "SYM_FUNCTION"; + case SYM_SORT: return "SYM_SORT"; + default: Unreachable(); + } +} + +/** + * The parser. The parser should be obtained by calling the static methods + * getNewParser, and should be deleted when done. + * + * This class includes convenience methods for interacting with the ExprManager + * from within a grammar. + */ +class CVC4_PUBLIC Input { + +public: + /** Create a parser for the given file. + * + * @param exprManager the ExprManager for creating expressions from the input + * @param lang the input language + * @param filename the input filename + */ + static Input* newFileParser(ExprManager* exprManager, InputLanguage lang, const std::string& filename, bool useMmap=false); + + /** Create a parser for the given input stream. + * + * @param exprManager the ExprManager for creating expressions from the input + * @param lang the input language + * @param input the input stream + * @param name the name of the stream, for use in error messages + */ + //static Parser* getNewParser(ExprManager* exprManager, InputLanguage lang, std::istream& input, const std::string& name); + + /** Create a parser for the given input string + * + * @param exprManager the ExprManager for creating expressions from the input + * @param lang the input language + * @param input the input string + * @param name the name of the stream, for use in error messages + */ + static Input* newStringParser(ExprManager* exprManager, InputLanguage lang, const std::string& input, const std::string& name); + + /** + * Destructor. + */ + ~Input(); + + /** + * Parse the next command of the input. If EOF is encountered a EmptyCommand + * is returned and done flag is set. + */ + Command* parseNextCommand() throw(ParserException); + + /** + * Parse the next expression of the stream. If EOF is encountered a null + * expression is returned and done flag is set. + * @return the parsed expression + */ + Expr parseNextExpression() throw(ParserException); + + /** + * Check if we are done -- either the end of input has been reached, or some + * error has been encountered. + * @return true if parser is done + */ + bool done() const; + + /** Enable semantic checks during parsing. */ + void enableChecks(); + + /** Disable semantic checks during parsing. Disabling checks may lead to crashes on bad inputs. */ + void disableChecks(); + + /** Get the name of the input file. */ + inline const std::string getFilename() { + return d_filename; + } + + /** + * Sets the logic for the current benchmark. Declares any logic symbols. + * + * @param name the name of the logic (e.g., QF_UF, AUFLIA) + */ + void setLogic(const std::string& name); + + /** + * Returns a variable, given a name and a type. + * @param var_name the name of the variable + * @return the variable expression + */ + Expr getVariable(const std::string& var_name); + + /** + * Returns a function, given a name and a type. + * @param name the name of the function + * @return the function expression + */ + Expr getFunction(const std::string& name); + + /** + * Returns a sort, given a name + */ + Type* getSort(const std::string& sort_name); + + /** + * Checks if a symbol has been declared. + * @param name the symbol name + * @param type the symbol type + * @return true iff the symbol has been declared with the given type + */ + bool isDeclared(const std::string& name, SymbolType type = SYM_VARIABLE); + + /** + * Checks if the declaration policy we want to enforce holds + * for the given symbol. + * @param name the symbol to check + * @param check the kind of check to perform + * @param type the type of the symbol + * @throws ParserException if checks are enabled and the check fails + */ + void checkDeclaration(const std::string& name, + DeclarationCheck check, + SymbolType type = SYM_VARIABLE) + throw (ParserException); + + /** + * Checks whether the given name is bound to a function. + * @param name the name to check + * @throws ParserException if checks are enabled and name is not bound to a function + */ + void checkFunction(const std::string& name) throw (ParserException); + + /** + * Check that <code>kind</code> can accept <code>numArgs</codes> arguments. + * @param kind the built-in operator to check + * @param numArgs the number of actual arguments + * @throws ParserException if checks are enabled and the operator <code>kind</code> cannot be + * applied to <code>numArgs</code> arguments. + */ + void checkArity(Kind kind, unsigned int numArgs) throw (ParserException); + + /** + * Returns the type for the variable with the given name. + * @param name the symbol to lookup + * @param type the (namespace) type of the symbol + */ + Type* getType(const std::string& var_name, + SymbolType type = SYM_VARIABLE); + + /** + * Returns the true expression. + * @return the true expression + */ + Expr getTrueExpr() const; + + /** + * Returns the false expression. + * @return the false expression + */ + Expr getFalseExpr() const; + + /** + * Creates a new unary CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param child the child + * @return the new unary expression + */ + Expr mkExpr(Kind kind, const Expr& child); + + /** + * Creates a new binary CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param child1 the first child + * @param child2 the second child + * @return the new binary expression + */ + Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2); + + /** + * Creates a new ternary CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param child_1 the first child + * @param child_2 the second child + * @param child_3 + * @return the new ternary expression + */ + Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, + const Expr& child_3); + + /** + * Creates a new CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param children the children of the expression + */ + Expr mkExpr(Kind kind, const std::vector<Expr>& children); + + /** Create a new CVC4 variable expression of the given type. */ + Expr mkVar(const std::string& name, Type* type); + + /** Create a set of new CVC4 variable expressions of the given + type. */ + const std::vector<Expr> + mkVars(const std::vector<std::string> names, + Type* type); + + + /** Create a new variable definition (e.g., from a let binding). */ + void defineVar(const std::string& name, const Expr& val); + /** Remove a variable definition (e.g., from a let binding). */ + void undefineVar(const std::string& name); + + /** Returns a function type over the given domain and range types. */ + Type* functionType(Type* domain, Type* range); + + /** Returns a function type over the given types. argTypes must be + non-empty. */ + Type* functionType(const std::vector<Type*>& argTypes, + Type* rangeType); + + /** + * Returns a function type over the given types. If types has only + * one element, then the type is just types[0]. + * + * @param types a non-empty list of input and output types. + */ + Type* functionType(const std::vector<Type*>& types); + + /** + * Returns a predicate type over the given sorts. If sorts is empty, + * then the type is just BOOLEAN. + + * @param sorts a list of input types + */ + Type* predicateType(const std::vector<Type*>& sorts); + + /** + * Creates a new sort with the given name. + */ + Type* newSort(const std::string& name); + + /** + * Creates a new sorts with the given names. + */ + const std::vector<Type*> + newSorts(const std::vector<std::string>& names); + + /** Is the symbol bound to a boolean variable? */ + bool isBoolean(const std::string& name); + + /** Is the symbol bound to a function? */ + bool isFunction(const std::string& name); + + /** Is the symbol bound to a predicate? */ + bool isPredicate(const std::string& name); + + /** Returns the boolean type. */ + BooleanType* booleanType(); + + /** Returns the kind type (i.e., the type of types). */ + KindType* kindType(); + + /** Returns the minimum arity of the given kind. */ + static unsigned int minArity(Kind kind); + + /** Returns the maximum arity of the given kind. */ + static unsigned int maxArity(Kind kind); + + virtual void parseError(const std::string& msg) throw (ParserException) = 0; + +protected: + virtual Command* doParseCommand() throw(ParserException) = 0; + virtual Expr doParseExpr() throw(ParserException) = 0; + + /** + * Create a new parser for the given file. + * @param exprManager the ExprManager to use + * @param filename the path of the file to parse + */ + Input(ExprManager* exprManager, const std::string& filename); + +private: + + /** Sets the done flag */ + void setDone(bool done = true); + + /** Lookup a symbol in the given namespace. */ + Expr getSymbol(const std::string& var_name, SymbolType type); + + /** Whether to de-allocate the input */ +// bool d_deleteInput; + + /** The expression manager */ + ExprManager* d_exprManager; + + /** The symbol table lookup */ + SymbolTable<Expr> d_varSymbolTable; + + /** The sort table */ + SymbolTable<Type*> d_sortTable; + + /** The name of the input file. */ + std::string d_filename; + + /** Are we done */ + bool d_done; + + /** Are semantic checks enabled during parsing? */ + bool d_checksEnabled; + +}; // end of class Parser + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__PARSER_H */ diff --git a/src/parser/memory_mapped_input_buffer.cpp b/src/parser/memory_mapped_input_buffer.cpp new file mode 100644 index 000000000..6618ecebc --- /dev/null +++ b/src/parser/memory_mapped_input_buffer.cpp @@ -0,0 +1,106 @@ +/* + * memory_mapped_input_buffer.cpp + * + * Created on: Mar 3, 2010 + * Author: chris + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdint.h> + +#include <sys/errno.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <antlr3input.h> + +#include "memory_mapped_input_buffer.h" +#include "util/Assert.h" +#include "util/exception.h" + +namespace CVC4 { +namespace parser { + +extern "C" { + +static ANTLR3_UINT32 +MemoryMapFile(pANTLR3_INPUT_STREAM input, const std::string& filename); + +void +UnmapFile(pANTLR3_INPUT_STREAM input); + +pANTLR3_INPUT_STREAM MemoryMappedInputBufferNew(const std::string& filename) { + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + ANTLR3_UINT32 status; + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); + + if(input == NULL) { + return NULL; + } + + // Structure was allocated correctly, now we can read the file. + // + status = MemoryMapFile(input, filename); + + // Call the common 8 bit ASCII input stream handler + // Initializer type thingy doobry function. + // + antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); + + // Now we can set up the file name + // + input->istream->streamName + = input->strFactory->newStr(input->strFactory, + (uint8_t*) filename.c_str()); + input->fileName = input->istream->streamName; + input->free = UnmapFile; + + if(status != ANTLR3_SUCCESS) { + input->close(input); + return NULL; + } + + return input; +} + +static ANTLR3_UINT32 MemoryMapFile(pANTLR3_INPUT_STREAM input, + const std::string& filename) { + errno = 0; + struct stat st; + if(stat(filename.c_str(), &st) == -1) { + return ANTLR3_ERR_NOFILE; + } + + input->sizeBuf = st.st_size; + + int fd = open(filename.c_str(), O_RDONLY); + if(fd == -1) { + return ANTLR3_ERR_NOFILE; + } + + input->data = mmap(0, input->sizeBuf, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, + 0); + errno = 0; + if(intptr_t(input->data) == -1) { + return ANTLR3_ERR_NOMEM; + } + + return ANTLR3_SUCCESS; +} + +/* This is a bit shady. antlr3AsciiSetupStream has free and close as aliases. + * We need to unmap the file somewhere, so we install this function as free and + * call the default version of close to de-allocate everything else. */ +void UnmapFile(pANTLR3_INPUT_STREAM input) { + munmap((void*) input->data, input->sizeBuf); + input->close(input); +} + +} +} +} diff --git a/src/parser/memory_mapped_input_buffer.h b/src/parser/memory_mapped_input_buffer.h index e1639a072..88ba2183a 100644 --- a/src/parser/memory_mapped_input_buffer.h +++ b/src/parser/memory_mapped_input_buffer.h @@ -14,69 +14,21 @@ #ifndef __CVC4__PARSER__MEMORY_MAPPED_INPUT_BUFFER_H #define __CVC4__PARSER__MEMORY_MAPPED_INPUT_BUFFER_H -#include <cstdio> -#include <cerrno> - -#include <fcntl.h> -#include <stdint.h> -#include <string.h> - -#include <sys/mman.h> -#include <sys/stat.h> - -#include <antlr/InputBuffer.hpp> - -#include "util/Assert.h" -#include "util/exception.h" +#include <antlr3input.h> +#include <string> namespace CVC4 { namespace parser { -class MemoryMappedInputBuffer : public antlr::InputBuffer { - -public: - MemoryMappedInputBuffer(const std::string& filename) { - struct stat st; - if( stat(filename.c_str(), &st) == -1 ) { - char buf[80]; - const char* errMsg = strerror_r(errno, buf, sizeof(buf)); - throw Exception("unable to stat() file `" + filename + "': " + errMsg); - } - d_size = st.st_size; - - int fd = open(filename.c_str(), O_RDONLY); - if( fd == -1 ) { - char buf[80]; - const char* errMsg = strerror_r(errno, buf, sizeof(buf)); - throw Exception("unable to fopen() file `" + filename + "': " + errMsg); - } - - d_start = static_cast< const char * >( - mmap( 0, d_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0 ) ); - if( intptr_t( d_start ) == -1 ) { - char buf[80]; - const char* errMsg = strerror_r(errno, buf, sizeof(buf)); - throw Exception("unable to mmap() file `" + filename + "': " + errMsg); - } - d_cur = d_start; - d_end = d_start + d_size; - } +extern "C" { - ~MemoryMappedInputBuffer() { - munmap((void*) d_start, d_size); - } +pANTLR3_INPUT_STREAM +MemoryMappedInputBufferNew(const std::string& filename); - inline int getChar() { - Assert( d_cur >= d_start && d_cur <= d_end ); - return d_cur == d_end ? EOF : *d_cur++; - } +} -private: - unsigned long int d_size; - const char *d_start, *d_end, *d_cur; -}; +} +} -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ #endif /* __CVC4__PARSER__MEMORY_MAPPED_INPUT_BUFFER_H */ diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp deleted file mode 100644 index a129d97ee..000000000 --- a/src/parser/parser.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/********************* */ -/** parser.cpp - ** Original author: mdeters - ** Major contributors: dejan - ** Minor contributors (to current version): cconway - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser implementation. - **/ - -#include <iostream> -#include <fstream> -#include <antlr/CharScanner.hpp> -#include <antlr/CharBuffer.hpp> - -#include "parser/parser.h" -#include "parser/memory_mapped_input_buffer.h" -#include "expr/command.h" -#include "util/output.h" -#include "util/Assert.h" -#include "parser/parser_exception.h" -#include "parser/antlr_parser.h" -#include "parser/smt/generated/AntlrSmtParser.hpp" -#include "parser/smt/generated/AntlrSmtLexer.hpp" -#include "parser/cvc/generated/AntlrCvcParser.hpp" -#include "parser/cvc/generated/AntlrCvcLexer.hpp" - -using namespace std; -using namespace antlr; - -namespace CVC4 { -namespace parser { - -void Parser::setDone(bool done) { - d_done = done; -} - -bool Parser::done() const { - return d_done; -} - -Command* Parser::parseNextCommand() throw (ParserException, AssertionException) { - Debug("parser") << "parseNextCommand()" << std::endl; - Command* cmd = NULL; - if(!done()) { - try { - cmd = d_antlrParser->parseCommand(); - if(cmd == NULL) { - setDone(); - } - } catch(antlr::ANTLRException& e) { - setDone(); - throw ParserException(e.toString()); - } - } - Debug("parser") << "parseNextCommand() => " << cmd << std::endl; - return cmd; -} - -Expr Parser::parseNextExpression() throw (ParserException, AssertionException) { - Debug("parser") << "parseNextExpression()" << std::endl; - Expr result; - if(!done()) { - try { - result = d_antlrParser->parseExpr(); - if(result.isNull()) - setDone(); - } catch(antlr::ANTLRException& e) { - setDone(); - throw ParserException(e.toString()); - } - } - Debug("parser") << "parseNextExpression() => " << result << std::endl; - return result; -} - -Parser::~Parser() { - delete d_antlrParser; - delete d_antlrLexer; - delete d_inputBuffer; -} - -Parser::Parser(InputBuffer* inputBuffer, AntlrParser* antlrParser, - CharScanner* antlrLexer) : - d_done(false), - d_antlrParser(antlrParser), - d_antlrLexer(antlrLexer), - d_inputBuffer(inputBuffer) { -} - -Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, - InputBuffer* inputBuffer, string filename) { - - AntlrParser* antlrParser = 0; - antlr::CharScanner* antlrLexer = 0; - - switch(lang) { - case LANG_CVC4: { - antlrLexer = new AntlrCvcLexer(*inputBuffer); - antlrParser = new AntlrCvcParser(*antlrLexer); - break; - } - case LANG_SMTLIB: { -// MemoryMappedInputBuffer inputBuffer(filename); -// antlrLexer = new AntlrSmtLexer(inputBuffer); - antlrLexer = new AntlrSmtLexer(*inputBuffer); - antlrParser = new AntlrSmtParser(*antlrLexer); - break; - } - default: - Unhandled("Unknown Input language!"); - } - - antlrLexer->setFilename(filename); - antlrParser->setFilename(filename); - antlrParser->setExpressionManager(em); - - return new Parser(inputBuffer, antlrParser, antlrLexer); -} - -Parser* Parser::getMemoryMappedParser(ExprManager* em, InputLanguage lang, string filename) { - MemoryMappedInputBuffer* inputBuffer = new MemoryMappedInputBuffer(filename); - return getNewParser(em, lang, inputBuffer, filename); -} - -Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, - istream& input, string filename) { - antlr::CharBuffer* inputBuffer = new CharBuffer(input); - return getNewParser(em, lang, inputBuffer, filename); -} - -void Parser::disableChecks() { - d_antlrParser->disableChecks(); -} - -void Parser::enableChecks() { - d_antlrParser->enableChecks(); -} - - -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ diff --git a/src/parser/parser.h b/src/parser/parser.h deleted file mode 100644 index d482b7907..000000000 --- a/src/parser/parser.h +++ /dev/null @@ -1,135 +0,0 @@ -/********************* */ -/** parser.h - ** Original author: mdeters - ** Major contributors: dejan - ** Minor contributors (to current version): cconway - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser abstraction. - **/ - -#ifndef __CVC4__PARSER__PARSER_H -#define __CVC4__PARSER__PARSER_H - -#include <string> -#include <iostream> - -#include "cvc4_config.h" -#include "parser/parser_exception.h" -#include "util/Assert.h" - -namespace antlr { - class CharScanner; - class InputBuffer; -} - -namespace CVC4 { - -// Forward declarations -class Expr; -class Command; -class ExprManager; - -namespace parser { - -class AntlrParser; - -/** - * The parser. The parser should be obtained by calling the static methods - * getNewParser, and should be deleted when done. - */ -class CVC4_PUBLIC Parser { - -public: - - /** The input language option */ - enum InputLanguage { - /** The SMTLIB input language */ - LANG_SMTLIB, - /** The CVC4 input language */ - LANG_CVC4, - /** Auto-detect the language */ - LANG_AUTO - }; - - static Parser* getMemoryMappedParser(ExprManager* em, InputLanguage lang, std::string filename); - static Parser* getNewParser(ExprManager* em, InputLanguage lang, std::istream& input, std::string filename); - - /** - * Destructor. - */ - ~Parser(); - - /** - * Parse the next command of the input. If EOF is encountered a EmptyCommand - * is returned and done flag is set. - */ - Command* parseNextCommand() throw(ParserException, AssertionException); - - /** - * Parse the next expression of the stream. If EOF is encountered a null - * expression is returned and done flag is set. - * @return the parsed expression - */ - Expr parseNextExpression() throw(ParserException, AssertionException); - - /** - * Check if we are done -- either the end of input has been reached, or some - * error has been encountered. - * @return true if parser is done - */ - bool done() const; - - /** Enable semantic checks during parsing. */ - void enableChecks(); - - /** Disable semantic checks during parsing. Disabling checks may lead to crashes on bad inputs. */ - void disableChecks(); - -private: - - /** - * Create a new parser. - * @param em the expression manager to usee - * @param lang the language to parse - * @param inputBuffer the input buffer to parse - * @param filename the filename to attach to the stream - * @param deleteInput wheather to delete the input - * @return the parser - */ - static Parser* getNewParser(ExprManager* em, InputLanguage lang, antlr::InputBuffer* inputBuffer, std::string filename); - - /** - * Create a new parser given the actual antlr parser. - * @param antlrParser the antlr parser to user - */ - Parser(antlr::InputBuffer* inputBuffer, AntlrParser* antlrParser, antlr::CharScanner* antlrLexer); - - /** Sets the done flag */ - void setDone(bool done = true); - - /** Are we done */ - bool d_done; - - /** The antlr parser */ - AntlrParser* d_antlrParser; - - /** The entlr lexer */ - antlr::CharScanner* d_antlrLexer; - - /** The input stream we are using */ - antlr::InputBuffer* d_inputBuffer; - - /** Wherther to de-allocate the input */ - bool d_deleteInput; -}; // end of class Parser - -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ - -#endif /* __CVC4__PARSER__PARSER_H */ diff --git a/src/parser/parser_exception.h b/src/parser/parser_exception.h index f0ddc6a7f..ee02289ee 100644 --- a/src/parser/parser_exception.h +++ b/src/parser/parser_exception.h @@ -18,8 +18,9 @@ #include "util/exception.h" #include "cvc4_config.h" -#include <string> #include <iostream> +#include <string> +#include <sstream> namespace CVC4 { namespace parser { @@ -30,11 +31,43 @@ public: ParserException() { } ParserException(const std::string& msg): Exception(msg) { } ParserException(const char* msg): Exception(msg) { } + ParserException(const std::string& msg, const std::string& filename, + unsigned long line, unsigned long column) : + Exception(msg), + d_filename(filename), + d_line(line), + d_column(column) { + } + // Destructor virtual ~ParserException() throw() {} virtual std::string toString() const { - return "Parse Error: " + d_msg; + if( d_line > 0 ) { + std::stringstream ss; + ss << "Parser Error: " << d_filename << ":" << d_line << "." + << d_column << ": " << d_msg; + return ss.str(); + } else { + return "Parse Error: " + d_msg; + } + } + + std::string getFilename() const { + return d_filename; } + + int getLine() const { + return d_line; + } + + int getColumn() const { + return d_column; + } + +protected: + std::string d_filename; + unsigned long d_line; + unsigned long d_column; }; // end of class ParserException }/* CVC4::parser namespace */ diff --git a/src/parser/parser_options.h b/src/parser/parser_options.h new file mode 100644 index 000000000..ddba219a4 --- /dev/null +++ b/src/parser/parser_options.h @@ -0,0 +1,27 @@ +/* + * parser_options.h + * + * Created on: Mar 3, 2010 + * Author: chris + */ + +#ifndef CVC4__PARSER__PARSER_OPTIONS_H_ +#define CVC4__PARSER__PARSER_OPTIONS_H_ + +namespace CVC4 { +namespace parser { + + /** The input language option */ + enum InputLanguage { + /** The SMTLIB input language */ + LANG_SMTLIB, + /** The CVC4 input language */ + LANG_CVC4, + /** Auto-detect the language */ + LANG_AUTO + }; + +} +} + +#endif /* CVC4__PARSER__PARSER_OPTIONS_H_ */ diff --git a/src/parser/smt/.gitignore b/src/parser/smt/.gitignore new file mode 100644 index 000000000..7fd0cf319 --- /dev/null +++ b/src/parser/smt/.gitignore @@ -0,0 +1,4 @@ +/.deps +/stamp-generated +/generated +/Makefile.in diff --git a/src/parser/smt/Makefile.am b/src/parser/smt/Makefile.am index 7fe235002..3ea6dc940 100644 --- a/src/parser/smt/Makefile.am +++ b/src/parser/smt/Makefile.am @@ -1,29 +1,29 @@ AM_CPPFLAGS = \ -D__BUILDING_CVC4PARSERLIB \ - -I@srcdir@/../../include -I@srcdir@/../.. + -I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall -fvisibility=hidden +# Compile generated C files using C++ compiler +CC=$(CXX) noinst_LTLIBRARIES = libparsersmt.la ANTLR_TOKEN_STUFF = \ - @srcdir@/generated/SmtVocabularyTokenTypes.hpp \ - @srcdir@/generated/SmtVocabularyTokenTypes.txt \ - @srcdir@/generated/AntlrSmtParserTokenTypes.hpp \ - @srcdir@/generated/AntlrSmtParserTokenTypes.txt + @srcdir@/generated/Smt.tokens ANTLR_LEXER_STUFF = \ - @srcdir@/generated/AntlrSmtLexer.hpp \ - @srcdir@/generated/AntlrSmtLexer.cpp \ + @srcdir@/generated/SmtLexer.h \ + @srcdir@/generated/SmtLexer.c \ $(ANTLR_TOKEN_STUFF) ANTLR_PARSER_STUFF = \ - @srcdir@/generated/AntlrSmtParser.hpp \ - @srcdir@/generated/AntlrSmtParser.cpp + @srcdir@/generated/SmtParser.h \ + @srcdir@/generated/SmtParser.c ANTLR_STUFF = \ $(ANTLR_LEXER_STUFF) \ $(ANTLR_PARSER_STUFF) libparsersmt_la_SOURCES = \ - smt_lexer.g \ - smt_parser.g \ + Smt.g \ + smt_input.h \ + smt_input.cpp \ $(ANTLR_STUFF) BUILT_SOURCES = $(ANTLR_STUFF) @@ -36,16 +36,14 @@ maintainer-clean-local: @srcdir@/stamp-generated: mkdir -p @srcdir@/generated touch @srcdir@/stamp-generated + # antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. -@srcdir@/generated/AntlrSmtLexer.hpp: smt_lexer.g @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_LEXER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_lexer.g" -@srcdir@/generated/AntlrSmtLexer.cpp $(ANTLR_TOKEN_STUFF): @srcdir@/generated/AntlrSmtLexer.hpp -# doesn't actually depend on the lexer, but if we're doing parallel +@srcdir@/generated/SmtLexer.h: Smt.g @srcdir@/stamp-generated + -rm -f $(ANTLR_STUFF) + $(ANTLR) -o "@srcdir@/generated" "@srcdir@/Smt.g" + +# These don't actually depend on SmtLexer.h, but if we're doing parallel # make and the lexer needs to be rebuilt, we have to keep the rules # from running in parallel (since the token files will be deleted & # recreated) -@srcdir@/generated/AntlrSmtParser.hpp: smt_parser.g smt_lexer.g $(ANTLR_TOKEN_STUFF) @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_PARSER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_parser.g" -@srcdir@/generated/AntlrSmtParser.cpp: @srcdir@/generated/AntlrSmtParser.hpp +@srcdir@/generated/SmtLexer.c @srcdir@/generated/SmtParser.h @srcdir@/generated/SmtParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/SmtLexer.h diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g new file mode 100644 index 000000000..7095c7269 --- /dev/null +++ b/src/parser/smt/Smt.g @@ -0,0 +1,552 @@ +/* ******************* */ +/* Smt.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for SMT-LIB input language. + **/ + +grammar Smt; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class SmtInput; +} +} + +extern +void SetSmtInput(CVC4::parser::SmtInput* smt); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/smt/smt_input.h" +#include "util/output.h" +#include <vector> + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::SmtInput *input; + +extern +void SetSmtInput(CVC4::parser::SmtInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : annotatedFormula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : b = benchmark { $cmd = b; } + ; + +/** + * Matches the whole SMT-LIB benchmark. + * @return the sequence command containing the whole problem + */ +benchmark returns [CVC4::Command* cmd] + : LPAREN_TOK BENCHMARK_TOK IDENTIFIER c = benchAttributes RPAREN_TOK + { $cmd = c; } + | EOF { $cmd = 0; } + ; + +/** + * Matches a sequence of benchmark attributes and returns a pointer to a + * command sequence. + * @return the command sequence + */ +benchAttributes returns [CVC4::CommandSequence* cmd_seq] +@init { + cmd_seq = new CommandSequence(); +} + : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ + ; + +/** + * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns + * a corresponding command + * @return a command corresponding to the attribute + */ +benchAttribute returns [CVC4::Command* smt_command] +@declarations { + std::string name; + BenchmarkStatus b_status; + Expr expr; +} + : LOGIC_TOK identifier[name,CHECK_NONE,SYM_VARIABLE] + { input->setLogic(name); + smt_command = new SetBenchmarkLogicCommand(name); } + | ASSUMPTION_TOK annotatedFormula[expr] + { smt_command = new AssertCommand(expr); } + | FORMULA_TOK annotatedFormula[expr] + { smt_command = new CheckSatCommand(expr); } + | STATUS_TOK status[b_status] + { smt_command = new SetBenchmarkStatusCommand(b_status); } + | EXTRAFUNS_TOK LPAREN_TOK (functionDeclaration)+ RPAREN_TOK + | EXTRAPREDS_TOK LPAREN_TOK (predicateDeclaration)+ RPAREN_TOK + | EXTRASORTS_TOK LPAREN_TOK sortDeclaration+ RPAREN_TOK + | NOTES_TOK STRING_LITERAL + | annotation + ; + +/** + * Matches an annotated formula. + * @return the expression representing the formula + */ +annotatedFormula[CVC4::Expr& expr] +@init { + Debug("parser") << "annotated formula: " << AntlrInput::tokenText(LT(1)) << std::endl; + Kind kind; + std::string name; + std::vector<Expr> args; /* = getExprVector(); */ +} + : /* a built-in operator application */ + LPAREN_TOK builtinOp[kind] annotatedFormulas[args,expr] RPAREN_TOK + { input->checkArity(kind, args.size()); + if((kind == CVC4::kind::AND || kind == CVC4::kind::OR) && args.size() == 1) { + /* Unary AND/OR can be replaced with the argument. + It just so happens expr should already by the only argument. */ + Assert( expr == args[0] ); + } else { + expr = input->mkExpr(kind, args); + } + } + + | /* A non-built-in function application */ + + // Semantic predicate not necessary if parenthesized subexpressions + // are disallowed + // { isFunction(LT(2)->getText()) }? + + LPAREN_TOK + functionSymbol[expr] + { args.push_back(expr); } + annotatedFormulas[args,expr] RPAREN_TOK + // TODO: check arity + { expr = input->mkExpr(CVC4::kind::APPLY_UF,args); } + + | /* An ite expression */ + LPAREN_TOK (ITE_TOK | IF_THEN_ELSE_TOK) + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + RPAREN_TOK + { expr = input->mkExpr(CVC4::kind::ITE, args); } + + | /* a let/flet binding */ + LPAREN_TOK + (LET_TOK LPAREN_TOK var_identifier[name,CHECK_UNDECLARED] + | FLET_TOK LPAREN_TOK fun_identifier[name,CHECK_UNDECLARED] ) + annotatedFormula[expr] RPAREN_TOK + { input->defineVar(name,expr); } + annotatedFormula[expr] + RPAREN_TOK + { input->undefineVar(name); } + + | /* a variable */ + ( identifier[name,CHECK_DECLARED,SYM_VARIABLE] + | var_identifier[name,CHECK_DECLARED] + | fun_identifier[name,CHECK_DECLARED] ) + { expr = input->getVariable(name); } + + /* constants */ + | TRUE_TOK { expr = input->getTrueExpr(); } + | FALSE_TOK { expr = input->getFalseExpr(); } + /* TODO: let, flet, quantifiers, arithmetic constants */ + ; + +/** + * Matches a sequence of annotated formulas and puts them into the formulas + * vector. + * @param formulas the vector to fill with formulas + * @param expr an Expr reference for the elements of the sequence + */ +/* NOTE: We pass an Expr in here just to avoid allocating a fresh Expr every + * time through this rule. */ +annotatedFormulas[std::vector<CVC4::Expr>& formulas, CVC4::Expr& expr] + : ( annotatedFormula[expr] { formulas.push_back(expr); } )+ + ; + +/** +* Matches on of the unary Boolean connectives. +* @return the kind of the Bollean connective +*/ +builtinOp[CVC4::Kind& kind] +@init { + Debug("parser") << "builtin: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : NOT_TOK { $kind = CVC4::kind::NOT; } + | IMPLIES_TOK { $kind = CVC4::kind::IMPLIES; } + | AND_TOK { $kind = CVC4::kind::AND; } + | OR_TOK { $kind = CVC4::kind::OR; } + | XOR_TOK { $kind = CVC4::kind::XOR; } + | IFF_TOK { $kind = CVC4::kind::IFF; } + | EQUAL_TOK { $kind = CVC4::kind::EQUAL; } + | DISTINCT_TOK { $kind = CVC4::kind::DISTINCT; } + /* TODO: lt, gt, plus, minus, etc. */ + ; + +/** + * Matches a (possibly undeclared) predicate identifier (returning the string). + * @param check what kind of check to do with the symbol + */ +predicateName[std::string& name, CVC4::parser::DeclarationCheck check] + : functionName[name,check] + ; + +/** + * Matches a (possibly undeclared) function identifier (returning the string) + * @param check what kind of check to do with the symbol + */ +functionName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_FUNCTION] + ; + +/** + * Matches an previously declared function symbol (returning an Expr) + */ +functionSymbol[CVC4::Expr& fun] +@declarations { + std::string name; +} + : functionName[name,CHECK_DECLARED] + { input->checkFunction(name); + fun = input->getFunction(name); } + ; + +/** + * Matches an attribute name from the input (:attribute_name). + */ +attribute + : ATTR_IDENTIFIER + ; + + + +functionDeclaration +@declarations { + std::string name; + std::vector<Type*> sorts; +} + : LPAREN_TOK functionName[name,CHECK_UNDECLARED] + t = sortSymbol // require at least one sort + { sorts.push_back(t); } + sortList[sorts] RPAREN_TOK + { t = input->functionType(sorts); + input->mkVar(name, t); } + ; + +/** + * Matches the declaration of a predicate and declares it + */ +predicateDeclaration +@declarations { + std::string name; + std::vector<Type*> p_sorts; +} + : LPAREN_TOK predicateName[name,CHECK_UNDECLARED] sortList[p_sorts] RPAREN_TOK + { Type *t = input->predicateType(p_sorts); + input->mkVar(name, t); } + ; + +sortDeclaration +@declarations { + std::string name; +} + : sortName[name,CHECK_UNDECLARED] + { Debug("parser") << "sort decl: '" << name << "'" << std::endl; + input->newSort(name); } + ; + +/** + * Matches a sequence of sort symbols and fills them into the given vector. + */ +sortList[std::vector<CVC4::Type*>& sorts] + : ( t = sortSymbol { sorts.push_back(t); })* + ; + +/** + * Matches the sort symbol, which can be an arbitrary identifier. + * @param check the check to perform on the name + */ +sortName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_SORT] + ; + +sortSymbol returns [CVC4::Type* t] +@declarations { + std::string name; +} + : sortName[name,CHECK_NONE] + { $t = input->getSort(name); } + ; + +/** + * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. + */ +status[ CVC4::BenchmarkStatus& status ] + : SAT_TOK { $status = SMT_SATISFIABLE; } + | UNSAT_TOK { $status = SMT_UNSATISFIABLE; } + | UNKNOWN_TOK { $status = SMT_UNKNOWN; } + ; + +/** + * Matches an annotation, which is an attribute name, with an optional user + */ +annotation + : attribute (USER_VALUE)? + ; + +/** + * Matches an identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + * @param type the intended namespace for the identifier + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + Debug("parser") << "identifier: " << id + << " check? " << toString(check) + << " type? " << toString(type) << std::endl; + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches an variable identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +var_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : VAR_IDENTIFIER + { id = AntlrInput::tokenText($VAR_IDENTIFIER); + Debug("parser") << "var_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_VARIABLE); } + ; + +/** + * Matches an function identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +fun_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : FUN_IDENTIFIER + { id = AntlrInput::tokenText($FUN_IDENTIFIER); + Debug("parser") << "fun_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_FUNCTION); } + ; + + +// Base SMT-LIB tokens +DISTINCT_TOK : 'distinct'; +ITE_TOK : 'ite'; +IF_THEN_ELSE_TOK : 'if_then_else'; +TRUE_TOK : 'true'; +FALSE_TOK : 'false'; +NOT_TOK : 'not'; +IMPLIES_TOK : 'implies'; +AND_TOK : 'and'; +OR_TOK : 'or'; +XOR_TOK : 'xor'; +IFF_TOK : 'iff'; +EXISTS_TOK : 'exists'; +FORALL_TOK : 'forall'; +LET_TOK : 'let'; +FLET_TOK : 'flet'; +THEORY_TOK : 'theory'; +SAT_TOK : 'sat'; +UNSAT_TOK : 'unsat'; +UNKNOWN_TOK : 'unknown'; +BENCHMARK_TOK : 'benchmark'; + +// The SMT attribute tokens +LOGIC_TOK : ':logic'; +ASSUMPTION_TOK : ':assumption'; +FORMULA_TOK : ':formula'; +STATUS_TOK : ':status'; +EXTRASORTS_TOK : ':extrasorts'; +EXTRAFUNS_TOK : ':extrafuns'; +EXTRAPREDS_TOK : ':extrapreds'; +NOTES_TOK : ':notes'; + +// arithmetic symbols +EQUAL_TOK : '='; +LESS_THAN_TOK : '<'; +GREATER_THAN_TOK : '>'; +AMPERSAND_TOK : '&'; +AT_TOK : '@'; +POUND_TOK : '#'; +PLUS_TOK : '+'; +MINUS_TOK : '-'; +STAR_TOK : '*'; +DIV_TOK : '/'; +PERCENT_TOK : '%'; +PIPE_TOK : '|'; +TILDE_TOK : '~'; + +// Language meta-symbols +//QUESTION_TOK : '?'; +//DOLLAR_TOK : '$'; +LPAREN_TOK : '('; +RPAREN_TOK : ')'; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER /*options { paraphrase = 'an identifier'; testLiterals = true; }*/ + : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* + ; + +/** + * Matches an identifier starting with a colon. + */ +ATTR_IDENTIFIER /*options { paraphrase = 'an identifier starting with a colon'; testLiterals = true; }*/ + : ':' IDENTIFIER + ; + +/** + * Matches an identifier starting with a question mark. + */ +VAR_IDENTIFIER + : '?' IDENTIFIER + ; + +/** + * Matches an identifier starting with a dollar sign. + */ +FUN_IDENTIFIER + : '$' IDENTIFIER + ; + +/** + * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with + * an open brace and end with closed brace. + */ +USER_VALUE + : '{' + ( ~('{' | '}') )* + '}' + ; + + +/** + * Matches and skips whitespace in the input. + */ +WHITESPACE /*options { paraphrase = 'whitespace'; }*/ + : (' ' | '\t' | '\f' | '\r' | '\n')+ { $channel = HIDDEN;; } + ; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL_TOK /*options { paraphrase = 'a numeral'; }*/ + : (DIGIT)+ + ; + +/** + * Matches a double quoted string literal. Escaping is supported, and escape + * character '\' has to be escaped. + */ +STRING_LITERAL /*options { paraphrase = 'a string literal'; }*/ + : '"' (ESCAPE | ~('"'|'\\'))* '"' + ; + +/** + * Matches the comments and ignores them + */ +COMMENT /*options { paraphrase = 'comment'; }*/ + : ';' (~('\n' | '\r'))* { $channel = HIDDEN;; } + ; + + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment +ALPHA /*options { paraphrase = 'a letter'; }*/ + : 'a'..'z' + | 'A'..'Z' + ; + +/** + * Matches the digits (0-9) + */ +fragment +DIGIT /*options { paraphrase = 'a digit'; }*/ + : '0'..'9' + ; + + +/** + * Matches an allowed escaped character. + */ +fragment ESCAPE + : '\\' ('"' | '\\' | 'n' | 't' | 'r') + ; + diff --git a/src/parser/smt/smt_input.cpp b/src/parser/smt/smt_input.cpp new file mode 100644 index 000000000..7a28c30f1 --- /dev/null +++ b/src/parser/smt/smt_input.cpp @@ -0,0 +1,73 @@ +/* + * smt_parser.cpp + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#include <antlr3.h> + +#include "expr/expr_manager.h" +#include "parser/parser_exception.h" +#include "parser/smt/smt_input.h" +#include "parser/smt/generated/SmtLexer.h" +#include "parser/smt/generated/SmtParser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=2 */ +SmtInput::SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap) : + AntlrInput(exprManager,filename,2,useMmap) { + init(); +} + +SmtInput::SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name) : + AntlrInput(exprManager,input,name,2) { + init(); +} + +void SmtInput::init() { + pANTLR3_INPUT_STREAM input = getInputStream(); + AlwaysAssert( input != NULL ); + + d_pSmtLexer = SmtLexerNew(input); + if( d_pSmtLexer == NULL ) { + throw ParserException("Failed to create SMT lexer."); + } + + setLexer( d_pSmtLexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pSmtParser = SmtParserNew(tokenStream); + if( d_pSmtParser == NULL ) { + throw ParserException("Failed to create SMT parser."); + } + + setParser(d_pSmtParser->pParser); + SetSmtInput(this); +} + + +SmtInput::~SmtInput() { + d_pSmtLexer->free(d_pSmtLexer); + d_pSmtParser->free(d_pSmtParser); +} + +Command* SmtInput::doParseCommand() throw (ParserException) { + return d_pSmtParser->parseCommand(d_pSmtParser); +} + +Expr SmtInput::doParseExpr() throw (ParserException) { + return d_pSmtParser->parseExpr(d_pSmtParser); +} + +pANTLR3_LEXER SmtInput::getLexer() { + return d_pSmtLexer->pLexer; +} + +} // namespace parser + +} // namespace CVC4 diff --git a/src/parser/smt/smt_input.h b/src/parser/smt/smt_input.h new file mode 100644 index 000000000..b3613d67b --- /dev/null +++ b/src/parser/smt/smt_input.h @@ -0,0 +1,47 @@ +/* + * smt_parser.h + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#ifndef SMT_PARSER_H_ +#define SMT_PARSER_H_ + +#include "parser/antlr_input.h" +#include "parser/smt/generated/SmtLexer.h" +#include "parser/smt/generated/SmtParser.h" + +// extern void SmtParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class SmtInput : public AntlrInput { +public: + SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap); + SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name); + ~SmtInput(); + +protected: + Command* doParseCommand() throw(ParserException); + Expr doParseExpr() throw(ParserException); + pANTLR3_LEXER getLexer(); + pANTLR3_LEXER createLexer(pANTLR3_INPUT_STREAM input); + pANTLR3_PARSER createParser(pANTLR3_COMMON_TOKEN_STREAM tokenStream); + +private: + void init(); + pSmtLexer d_pSmtLexer; + pSmtParser d_pSmtParser; +}; // class SmtInput +} // namespace parser + +} // namespace CVC4 + +#endif /* SMT_PARSER_H_ */ diff --git a/src/parser/smt/smt_lexer.g b/src/parser/smt/smt_lexer.g deleted file mode 100644 index d694cd93f..000000000 --- a/src/parser/smt/smt_lexer.g +++ /dev/null @@ -1,220 +0,0 @@ -/* ******************* */ -/* smt_lexer.g - ** Original author: dejan - ** Major contributors: cconway, mdeters - ** Minor contributors (to current version): none - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Lexer for SMT-LIB input language. - **/ - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrSmtLexer class is a stream tokenizer (lexer) for the SMT-LIB benchmark - * language. - */ -class AntlrSmtLexer extends Lexer; - -options { - exportVocab = SmtVocabulary; // Name of the shared token vocabulary - testLiterals = false; // Do not check for literals by default - defaultErrorHandler = false; // Skip the default error handling, just break with exceptions - k = 2; -} - -tokens { - // Base SMT-LIB tokens - DISTINCT = "distinct"; - ITE = "ite"; - IF_THEN_ELSE = "if_then_else"; - TRUE = "true"; - FALSE = "false"; - NOT = "not"; - IMPLIES = "implies"; - AND = "and"; - OR = "or"; - XOR = "xor"; - IFF = "iff"; - EXISTS = "exists"; - FORALL = "forall"; - LET = "let"; - FLET = "flet"; - THEORY = "theory"; - LOGIC = "logic"; - SAT = "sat"; - UNSAT = "unsat"; - UNKNOWN = "unknown"; - BENCHMARK = "benchmark"; - // The SMT attribute tokens - LOGIC_ATTR = ":logic"; - ASSUMPTION_ATTR = ":assumption"; - FORMULA_ATTR = ":formula"; - STATUS_ATTR = ":status"; - EXTRASORTS_ATTR = ":extrasorts"; - EXTRAFUNS_ATTR = ":extrafuns"; - EXTRAPREDS_ATTR = ":extrapreds"; - C_NOTES = ":notes"; - // arithmetic symbols - EQUAL = "="; - LESS_THAN = "<"; - GREATER_THAN = ">"; - AMPERSAND = "&"; - AT = "@"; - POUND = "#"; - PLUS = "+"; - MINUS = "-"; - STAR = "*"; - DIV = "/"; - PERCENT = "%"; - PIPE = "|"; - TILDE = "~"; -} - -/** - * Matches any letter ('a'-'z' and 'A'-'Z'). - */ -protected -ALPHA options { paraphrase = "a letter"; } - : 'a'..'z' - | 'A'..'Z' - ; - -/** - * Matches the digits (0-9) - */ -protected -DIGIT options { paraphrase = "a digit"; } - : '0'..'9' - ; - -/** - * Matches an identifier from the input. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a letter. - */ -IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; } - : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -/** - * Matches an identifier starting with a colon. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a colon. - */ -C_IDENTIFIER options { paraphrase = "an attribute (e.g., ':x')"; testLiterals = true; } - : ':' ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -VAR_IDENTIFIER options { paraphrase = "a variable (e.g., '?x')"; testLiterals = false; } - : '?' IDENTIFIER - ; - -FUN_IDENTIFIER options { paraphrase = "a function variable (e.g, '$x')"; testLiterals = false; } - : '$' IDENTIFIER - ; - - -/** - * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with - * an open brace and end with closed brace. - */ -USER_VALUE - : '{' - ( '\n' { newline(); } - | ~('{' | '}' | '\n') - )* - '}' - ; - -/** - * Matches the question mark symbol ('?'). - */ -QUESTION_MARK options { paraphrase = "a question mark '?'"; } - : '?' - ; - -/** - * Matches the dollar sign ('$'). - */ -DOLLAR_SIGN options { paraphrase = "a dollar sign '$'"; } - : '$' - ; - -/** - * Matches the left bracket ('('). - */ -LPAREN options { paraphrase = "a left parenthesis '('"; } - : '('; - -/** - * Matches the right bracket ('('). - */ -RPAREN options { paraphrase = "a right parenthesis ')'"; } - : ')'; - -/** - * Matches and skips whitespace in the input. - */ -WHITESPACE options { paraphrase = "whitespace"; } - : (' ' | '\t' | '\f') { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches and skips the newline symbols in the input. - */ -NEWLINE options { paraphrase = "a newline"; } - : ('\r' '\n' | '\r' | '\n') { $setType(antlr::Token::SKIP); newline(); } - ; - -/** - * Matches a numeral from the input (non-empty sequence of digits). - */ -NUMERAL options { paraphrase = "a numeral"; } - : (DIGIT)+ - ; - -/** - * Matches an allowed escaped character. - */ -protected ESCAPE - : '\\' ('"' | '\\' | 'n' | 't' | 'r') - ; - -/** - * Matches a double quoted string literal. Escaping is supported, and escape - * character '\' has to be escaped. - */ -STRING_LITERAL options { paraphrase = "a string literal"; } - : '"' (ESCAPE | ~('"'|'\\'))* '"' - ; - -/** - * Matches the comments and ignores them - */ -COMMENT options { paraphrase = "comment"; } - : ';' (~('\n' | '\r'))* { $setType(antlr::Token::SKIP); } - ; - -/* Arithmetic symbol tokens */ -EQUAL : "="; -LESS_THAN : "<"; -GREATER_THAN : ">"; -AMPERSAND : "&"; -AT : "@"; -POUND : "#"; -PLUS : "+"; -MINUS : "-"; -STAR : "*"; -DIV : "/"; -PERCENT : "%"; -PIPE : "|"; -TILDE : "~"; diff --git a/src/parser/smt/smt_parser.g b/src/parser/smt/smt_parser.g deleted file mode 100644 index b876e1649..000000000 --- a/src/parser/smt/smt_parser.g +++ /dev/null @@ -1,351 +0,0 @@ -/* ******************* */ -/* smt_parser.g - ** Original author: dejan - ** Major contributors: mdeters, cconway - ** Minor contributors (to current version): none - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser for SMT-LIB input language. - **/ - -header "post_include_hpp" { -#include "parser/antlr_parser.h" -#include "expr/command.h" -} - -header "post_include_cpp" { -#include "expr/type.h" -#include "util/output.h" -#include <vector> - -using namespace std; -using namespace CVC4; -using namespace CVC4::parser; -} - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrSmtParser class is the parser for the SMT-LIB files. - */ -class AntlrSmtParser extends Parser("AntlrParser"); -options { - genHashLines = true; // Include line number information - importVocab = SmtVocabulary; // Import the common vocabulary - defaultErrorHandler = false; // Skip the default error handling, just break with exceptions - k = 2; -} - -/** - * Parses an expression. - * @return the parsed expression - */ -parseExpr returns [CVC4::Expr expr] - : expr = annotatedFormula - | EOF - ; - -/** - * Parses a command (the whole benchmark) - * @return the command of the benchmark - */ -parseCommand returns [CVC4::Command* cmd] - : cmd = benchmark - ; - -/** - * Matches the whole SMT-LIB benchmark. - * @return the sequence command containing the whole problem - */ -benchmark returns [Command* cmd] - : LPAREN BENCHMARK IDENTIFIER cmd = benchAttributes RPAREN - | EOF { cmd = 0; } - ; - -/** - * Matches a sequence of benchmark attributes and returns a pointer to a - * command sequence. - * @return the command sequence - */ -benchAttributes returns [CVC4::CommandSequence* cmd_seq = new CommandSequence()] -{ - Command* cmd; -} - : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ - ; - -/** - * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns - * a corresponding command - * @retrurn a command corresponding to the attribute - */ -benchAttribute returns [Command* smt_command = 0] -{ - Expr formula; - string logic; - SetBenchmarkStatusCommand::BenchmarkStatus b_status = SetBenchmarkStatusCommand::SMT_UNKNOWN; -} - : LOGIC_ATTR logic = identifier - { setLogic(logic); - smt_command = new SetBenchmarkLogicCommand(logic); } - | ASSUMPTION_ATTR formula = annotatedFormula - { smt_command = new AssertCommand(formula); } - | FORMULA_ATTR formula = annotatedFormula - { smt_command = new CheckSatCommand(formula); } - | STATUS_ATTR b_status = status - { smt_command = new SetBenchmarkStatusCommand(b_status); } - | EXTRAFUNS_ATTR LPAREN (functionDeclaration)+ RPAREN - | EXTRAPREDS_ATTR LPAREN (predicateDeclaration)+ RPAREN - | EXTRASORTS_ATTR LPAREN (sortDeclaration)+ RPAREN - | NOTES_ATTR STRING_LITERAL - | annotation - ; - -/** - * Matches an annotated formula. - * @return the expression representing the formula - */ -annotatedFormula returns [CVC4::Expr formula] -{ - Debug("parser") << "annotated formula: " << LT(1)->getText() << endl; - Kind kind = CVC4::kind::UNDEFINED_KIND; - vector<Expr> args; - std::string name; - Expr expr1, expr2, expr3; -} - : /* a built-in operator application */ - LPAREN kind = builtinOp annotatedFormulas[args] RPAREN - { checkArity(kind, args.size()); - if((kind == kind::AND || kind == kind::OR) && args.size() == 1) { - formula = args[0]; - } else { - formula = mkExpr(kind, args); - } - } - - | /* a "distinct" expr */ - /* TODO: Should there be a DISTINCT kind in the Expr package? */ - LPAREN DISTINCT annotatedFormulas[args] RPAREN - { formula = mkExpr(CVC4::kind::DISTINCT, args); } - - | /* An ite expression */ - LPAREN (ITE | IF_THEN_ELSE) - { Expr test, trueExpr, falseExpr; } - test = annotatedFormula - trueExpr = annotatedFormula - falseExpr = annotatedFormula - RPAREN - { formula = mkExpr(CVC4::kind::ITE, test, trueExpr, falseExpr); } - - | /* A let/flet binding */ - LPAREN (LET LPAREN name=variable[CHECK_UNDECLARED] - | FLET LPAREN name=function_var[CHECK_UNDECLARED] ) - expr1=annotatedFormula RPAREN - { defineVar(name,expr1); } - formula=annotatedFormula - { undefineVar(name); } - RPAREN - - | /* A non-built-in function application */ - { Expr f; } - LPAREN f = functionSymbol - { args.push_back(f); } - annotatedFormulas[args] RPAREN - // TODO: check arity - { formula = mkExpr(CVC4::kind::APPLY_UF, args); } - - | /* a variable */ - { std::string name; } - ( name = identifier[CHECK_DECLARED] - | name = variable[CHECK_DECLARED] - | name = function_var[CHECK_DECLARED] ) - { formula = getVariable(name); } - - /* constants */ - | TRUE { formula = getTrueExpr(); } - | FALSE { formula = getFalseExpr(); } - /* TODO: quantifiers, arithmetic constants */ - ; - -/** - * Matches a sequence of annotaed formulas and puts them into the formulas - * vector. - * @param formulas the vector to fill with formulas - */ -annotatedFormulas[std::vector<Expr>& formulas] -{ - Expr f; -} - : ( f = annotatedFormula { formulas.push_back(f); } )+ - ; - -/** -* Matches on of the unary Boolean connectives. -* @return the kind of the Bollean connective -*/ -builtinOp returns [CVC4::Kind kind] -{ - Debug("parser") << "builtin: " << LT(1)->getText() << endl; -} - : NOT { kind = CVC4::kind::NOT; } - | IMPLIES { kind = CVC4::kind::IMPLIES; } - | AND { kind = CVC4::kind::AND; } - | OR { kind = CVC4::kind::OR; } - | XOR { kind = CVC4::kind::XOR; } - | IFF { kind = CVC4::kind::IFF; } - | EQUAL { kind = CVC4::kind::EQUAL; } - /* TODO: lt, gt, plus, minus, etc. */ - ; - -/** - * Matches a (possibly undeclared) predicate identifier (returning the string). - * @param check what kind of check to do with the symbol - */ -predicateName[DeclarationCheck check = CHECK_NONE] returns [std::string p] - : p = identifier[check] - ; - -/** - * Matches a (possibly undeclared) function identifier (returning the string) - * @param check what kind of check to do with the symbol - */ -functionName[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : name = identifier[check,SYM_FUNCTION] - ; - -/** - * Matches an previously declared function symbol (returning an Expr) - */ -functionSymbol returns [CVC4::Expr fun] -{ - string name; -} - : name = functionName[CHECK_DECLARED] - { checkFunction(name); - fun = getFunction(name); } - ; - -/** - * Matches an attribute name from the input (:attribute_name). - */ -attribute - : C_IDENTIFIER - ; - -variable[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : x:VAR_IDENTIFIER - { name = x->getText(); - checkDeclaration(name, check, SYM_VARIABLE); } - ; - -function_var[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : x:FUN_IDENTIFIER - { name = x->getText(); - checkDeclaration(name, check, SYM_FUNCTION); } - ; - -/** - * Matches the sort symbol, which can be an arbitrary identifier. - * @param check the check to perform on the name - */ -sortName[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : name = identifier[check,SYM_SORT] - ; - -sortSymbol returns [CVC4::Type* t] -{ - std::string name; -} - : name = sortName { t = getSort(name); } - ; - -functionDeclaration -{ - string name; - Type* t; - std::vector<Type*> sorts; -} - : LPAREN name = functionName[CHECK_UNDECLARED] - t = sortSymbol // require at least one sort - { sorts.push_back(t); } - sortList[sorts] RPAREN - { t = functionType(sorts); - mkVar(name, t); } - ; - -/** - * Matches the declaration of a predicate and declares it - */ -predicateDeclaration -{ - string p_name; - std::vector<Type*> p_sorts; - Type *t; -} - : LPAREN p_name = predicateName[CHECK_UNDECLARED] sortList[p_sorts] RPAREN - { t = predicateType(p_sorts); - mkVar(p_name, t); } - ; - -sortDeclaration -{ - string name; -} - : name = sortName[CHECK_UNDECLARED] - { newSort(name); } - ; - -/** - * Matches a sequence of sort symbols and fills them into the given vector. - */ -sortList[std::vector<Type*>& sorts] -{ - Type* t; -} - : ( t = sortSymbol { sorts.push_back(t); })* - ; - -/** - * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. - */ -status returns [ SetBenchmarkStatusCommand::BenchmarkStatus status ] - : SAT { status = SetBenchmarkStatusCommand::SMT_SATISFIABLE; } - | UNSAT { status = SetBenchmarkStatusCommand::SMT_UNSATISFIABLE; } - | UNKNOWN { status = SetBenchmarkStatusCommand::SMT_UNKNOWN; } - ; - -/** - * Matches an annotation, which is an attribute name, with an optional user - */ -annotation - : attribute (USER_VALUE)? - ; - -/** - * Matches an identifier and returns a string. - * @param check what kinds of check to do on the symbol - * @return the id string - */ -identifier[DeclarationCheck check = CHECK_NONE, - SymbolType type = SYM_VARIABLE] -returns [std::string id] -{ - Debug("parser") << "identifier: " << LT(1)->getText() - << " check? " << toString(check) - << " type? " << toString(type) << endl; -} - : x:IDENTIFIER - { id = x->getText(); - checkDeclaration(id, check, type); } - ; - diff --git a/src/parser/symbol_table.h b/src/parser/symbol_table.h index 3bcc080bd..d6467f4e9 100644 --- a/src/parser/symbol_table.h +++ b/src/parser/symbol_table.h @@ -23,6 +23,8 @@ #include <ext/hash_map> +#include "util/Assert.h" + namespace CVC4 { namespace parser { |