diff options
author | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-05-01 20:45:47 +0000 |
---|---|---|
committer | Christopher L. Conway <christopherleeconway@gmail.com> | 2010-05-01 20:45:47 +0000 |
commit | 592738efcfa12734e2ac0bb7e7dfa33299f1abb3 (patch) | |
tree | 446aa62f6bb2832828d6a2983b27007a9bf90cc9 /src | |
parent | d0affb22ebcd4cc7cc7dd6ec7a51233d8632d630 (diff) |
Adding missed antlr_input files
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/antlr_input.cpp | 225 | ||||
-rw-r--r-- | src/parser/antlr_input.h | 196 |
2 files changed, 421 insertions, 0 deletions
diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp new file mode 100644 index 000000000..675a04510 --- /dev/null +++ b/src/parser/antlr_input.cpp @@ -0,0 +1,225 @@ +/********************* */ +/** input.cpp + ** Original author: cconway + ** Major contributors: none + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** A super-class for ANTLR-generated input language parsers + **/ + +#include <limits.h> +#include <antlr3.h> + +#include "antlr_input.h" +#include "input.h" +#include "bounded_token_buffer.h" +#include "bounded_token_factory.h" +#include "memory_mapped_input_buffer.h" +#include "parser_exception.h" +#include "parser.h" + +#include "expr/command.h" +#include "expr/type.h" +#include "parser/cvc/cvc_input.h" +#include "parser/smt/smt_input.h" +#include "parser/smt2/smt2_input.h" +#include "util/output.h" +#include "util/Assert.h" + +using namespace std; +using namespace CVC4; +using namespace CVC4::parser; +using namespace CVC4::kind; + +namespace CVC4 { +namespace parser { + +AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input) : + InputStream(name), + d_input(input) { +} + +AntlrInputStream::~AntlrInputStream() { + d_input->free(d_input); +} + +pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const { + return d_input; +} + +AntlrInputStream* AntlrInputStream::newFileInputStream(const std::string& name, bool useMmap) { + if( useMmap ) { + return new AntlrInputStream( name, MemoryMappedInputBufferNew(name) ); + } else { + return new AntlrInputStream( name, antlr3AsciiFileStreamNew((pANTLR3_UINT8) name.c_str()) ); + } +/* + if( d_inputStream == NULL ) { + throw ParserException("Couldn't open file: " + filename); + } +*/ +} + +AntlrInputStream* AntlrInputStream::newStringInputStream(const std::string& input, const std::string& name) /*throw (InputStreamException) */{ + char* inputStr = strdup(input.c_str()); + char* nameStr = strdup(name.c_str()); +/* + if( inputStr==NULL || nameStr==NULL ) { + throw InputStreamException("Couldn't initialize string input: '" + input + "'"); + } +*/ + return new AntlrInputStream( name, + antlr3NewAsciiStringInPlaceStream( + (pANTLR3_UINT8)inputStr,input.size(), + (pANTLR3_UINT8)nameStr) ); +} + +AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream *inputStream) { + AntlrInput* input; + + switch(lang) { + case LANG_CVC4: { + input = new CvcInput(inputStream); + break; + } + case LANG_SMTLIB: + input = new SmtInput(inputStream); + break; + + case LANG_SMTLIB_V2: + input = new Smt2Input(inputStream); + break; + + default: + Unhandled(lang); + } + return input; +} + +AntlrInput::AntlrInput(AntlrInputStream *inputStream, unsigned int lookahead) : + Input(inputStream), + d_lookahead(lookahead), + d_lexer(NULL), + d_parser(NULL), + d_antlr3InputStream( inputStream->getAntlr3InputStream() ), + d_tokenStream(NULL) { +} + +/* +AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead) + Parser(exprManager,name), + d_lookahead(lookahead) { + +} +*/ + +/* +AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) : + Input(exprManager,name), + d_lookahead(lookahead), + d_lexer(NULL), + d_parser(NULL), + d_tokenStream(NULL) { + + char* inputStr = strdup(input.c_str()); + char* nameStr = strdup(name.c_str()); + if( inputStr==NULL || nameStr==NULL ) { + throw ParserException("Couldn't initialize string input: '" + input + "'"); + } + d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr); + if( d_inputStream == NULL ) { + throw ParserException("Couldn't create input stream for string: '" + input + "'"); + } + +} +*/ + +AntlrInput::~AntlrInput() { + d_tokenStream->free(d_tokenStream); +} + +pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() { + return d_tokenStream; +} + +void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) { + pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super); + AlwaysAssert(lexer!=NULL); + Parser *parser = (Parser*)(lexer->super); + AlwaysAssert(parser!=NULL); + AntlrInput *input = (AntlrInput*) parser->getInput(); + AlwaysAssert(input!=NULL); + + // Call the error display routine + input->parseError("Error finding next token."); +} + +void AntlrInput::parseError(const std::string& message) + throw (ParserException) { + Debug("parser") << "Throwing exception: " + << getInputStream()->getName() << ":" + << d_lexer->getLine(d_lexer) << "." + << d_lexer->getCharPositionInLine(d_lexer) << ": " + << message << endl; + throw ParserException(message, getInputStream()->getName(), + d_lexer->getLine(d_lexer), + d_lexer->getCharPositionInLine(d_lexer)); +} + + +void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) { + d_lexer = pLexer; + + pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory; + if( pTokenFactory != NULL ) { + pTokenFactory->close(pTokenFactory); + } + + /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */ + pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead); + if( pTokenFactory == NULL ) { + throw ParserException("Couldn't create token factory."); + } + d_lexer->rec->state->tokFactory = pTokenFactory; + + pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource); + if( buffer == NULL ) { + throw ParserException("Couldn't create token buffer."); + } + + d_tokenStream = buffer->commonTstream; + + // Override default lexer error reporting + d_lexer->rec->reportError = &lexerError; +} + +void AntlrInput::setParser(Parser *parser) { + // ANTLR isn't using super in the lexer or the parser, AFAICT. + // We could also use @lexer/parser::context to add a field to the generated + // objects, but then it would have to be declared separately in every + // language's grammar and we'd have to in the address of the field anyway. + d_lexer->super = parser; + d_parser->super = parser; + +} + +void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) { + d_parser = pParser; +// d_parser->rec->match = &match; + d_parser->rec->reportError = &reportError; + /* Don't try to recover from a parse error. */ + // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it. + d_parser->rec->recoverFromMismatchedToken = + (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*)) + d_parser->rec->mismatch; +} + + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h new file mode 100644 index 000000000..1d6f5f790 --- /dev/null +++ b/src/parser/antlr_input.h @@ -0,0 +1,196 @@ +/********************* */ +/** input.h + ** Original author: cconway + ** Major contributors: none + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Base for ANTLR parser classes. + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__ANTLR_INPUT_H +#define __CVC4__PARSER__ANTLR_INPUT_H + +#include <antlr3.h> +#include <iostream> +#include <string> +#include <vector> + +#include "input.h" +#include "parser_options.h" +#include "parser_exception.h" +#include "expr/expr.h" +#include "expr/expr_manager.h" +#include "util/Assert.h" + +namespace CVC4 { + +class Command; +class Type; +class FunctionType; + +namespace parser { + +/** Wrapper around an ANTLR3 input stream. */ +class AntlrInputStream : public InputStream { + pANTLR3_INPUT_STREAM d_input; + + AntlrInputStream(std::string name,pANTLR3_INPUT_STREAM input); + + /* This is private and unimplemented, because you should never use it. */ + AntlrInputStream(const AntlrInputStream& inputStream); + + /* This is private and unimplemented, because you should never use it. */ + AntlrInputStream& operator=(const AntlrInputStream& inputStream); + +public: + + virtual ~AntlrInputStream(); + + pANTLR3_INPUT_STREAM getAntlr3InputStream() const; + + /** Create a file input. + * + * @param filename the path of the file to read + * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the + * input will use the standard ANTLR3 I/O implementation. + */ + static AntlrInputStream* newFileInputStream(const std::string& name, bool useMmap = false); + + /** Create an input from an istream. */ + // AntlrInputStream newInputStream(std::istream& input, const std::string& name); + + /** Create a string input. + * + * @param input the string to read + * @param name the "filename" to use when reporting errors + */ + static AntlrInputStream* newStringInputStream(const std::string& input, const std::string& name); +}; + +class Parser; + +/** + * An input to be parsed. The static factory methods in this class (e.g., + * <code>newFileInput</code>, <code>newStringInput</code>) create a parser + * for the given input language and attach it to an input source of the + * appropriate type. + */ +class AntlrInput : public Input{ + /** The token lookahead used to lex and parse the input. This should usually be equal to + * <code>K</code> for an LL(k) grammar. */ + unsigned int d_lookahead; + + /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It + * must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */ + pANTLR3_LEXER d_lexer; + + /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It + * must be set by a call to <code>setParser</code>, preferably in the subclass constructor. + * The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and + * <code>reportError</code> will be set to <code>Input::reportError</code>. */ + pANTLR3_PARSER d_parser; + + /** The ANTLR3 input stream associated with this input. */ + pANTLR3_INPUT_STREAM d_antlr3InputStream; + + /** The ANTLR3 token stream associated with this input. We only need this so we can free it on exit. + * This is set by <code>setLexer</code>. + * NOTE: We assume that we <em>can</em> free it on exit. No sharing! */ + pANTLR3_COMMON_TOKEN_STREAM d_tokenStream; + + /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */ + static void reportError(pANTLR3_BASE_RECOGNIZER recognizer); + + /** Builds a message for a lexer error and calls <code>parseError</code>. */ + static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer); + + /* Since we own d_tokenStream and it needs to be freed, we need to prevent + * copy construction and assignment. + */ + AntlrInput(const AntlrInput& input); + AntlrInput& operator=(const AntlrInput& input); + +public: + + /** Destructor. Frees the token stream and closes the input. */ + virtual ~AntlrInput(); + + /** Create an input for the given AntlrInputStream. NOTE: the new Input + * will take ownership of the input stream and delete it at destruction time. + * + * @param lang the input language + * @param inputStream the input stream + * + * */ + static AntlrInput* newInput(InputLanguage lang, AntlrInputStream *inputStream); + + /** Retrieve the text associated with a token. */ + inline static std::string tokenText(pANTLR3_COMMON_TOKEN token); + /** Retrieve an Integer from the text of a token */ + inline static Integer tokenToInteger( pANTLR3_COMMON_TOKEN token ); + /** Retrieve a Rational from the text of a token */ + inline static Rational tokenToRational(pANTLR3_COMMON_TOKEN token); + +protected: + /** Create an input. This input takes ownership of the given input stream, + * and will delete it at destruction time. + * + * @param inputStream the input stream to use + * @param lookahead the lookahead needed to parse the input (i.e., k for + * an LL(k) grammar) + */ + AntlrInput(AntlrInputStream *inputStream, unsigned int lookahead); + + /** Retrieve the token stream for this parser. Must not be called before + * <code>setLexer()</code>. */ + pANTLR3_COMMON_TOKEN_STREAM getTokenStream(); + + /** + * Throws a <code>ParserException</code> with the given message. + */ + void parseError(const std::string& msg) throw (ParserException); + + /** Set the ANTLR3 lexer for this input. */ + void setAntlr3Lexer(pANTLR3_LEXER pLexer); + + /** Set the ANTLR3 parser implementation for this input. */ + void setAntlr3Parser(pANTLR3_PARSER pParser); + + /** Set the Parser object for this input. */ + void setParser(Parser *parser); +}; + +std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) { + ANTLR3_MARKER start = token->getStartIndex(token); + ANTLR3_MARKER end = token->getStopIndex(token); + /* start and end are boundary pointers. The text is a string + * of (end-start+1) bytes beginning at start. */ + std::string txt( (const char *)start, end-start+1 ); + Debug("parser-extra") << "tokenText: start=" << start << std::endl + << "end=" << end << std::endl + << "txt='" << txt << "'" << std::endl; + return txt; +} + +Integer AntlrInput::tokenToInteger(pANTLR3_COMMON_TOKEN token) { + Integer i( tokenText(token) ); + return i; +} + +Rational AntlrInput::tokenToRational(pANTLR3_COMMON_TOKEN token) { + Rational r( tokenText(token) ); + return r; +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__ANTLR_INPUT_H */ |