summaryrefslogtreecommitdiff
path: root/src/parser/input.h
diff options
context:
space:
mode:
authorChristopher L. Conway <christopherleeconway@gmail.com>2010-04-28 18:34:11 +0000
committerChristopher L. Conway <christopherleeconway@gmail.com>2010-04-28 18:34:11 +0000
commita72c7a26fda2b9c268912e618fd7d71164e4800a (patch)
treee1694867f049b5328720abc9496cfe926989aae7 /src/parser/input.h
parent7a8454030fdbb1e6c2a6db7ce18eafe0764eaf4a (diff)
Refactoring Input/Parser code to support external manipulation of the parser state.
Diffstat (limited to 'src/parser/input.h')
-rw-r--r--src/parser/input.h233
1 files changed, 152 insertions, 81 deletions
diff --git a/src/parser/input.h b/src/parser/input.h
index a32416305..21c5c4869 100644
--- a/src/parser/input.h
+++ b/src/parser/input.h
@@ -2,7 +2,7 @@
/** input.h
** Original author: cconway
** Major contributors: none
- ** Minor contributors (to current version): mdeters
+ ** Minor contributors (to current version): none
** This file is part of the CVC4 prototype.
** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys)
** Courant Institute of Mathematical Sciences
@@ -10,157 +10,228 @@
** See the file COPYING in the top-level source directory for licensing
** information.
**
- ** Parser abstraction.
+ ** Base for ANTLR parser classes.
**/
-#include "cvc4parser_public.h"
+#include "cvc4parser_private.h"
-#ifndef __CVC4__PARSER__INPUT_H
-#define __CVC4__PARSER__INPUT_H
+#ifndef __CVC4__PARSER__ANTLR_INPUT_H
+#define __CVC4__PARSER__ANTLR_INPUT_H
+#include <antlr3.h>
+#include <iostream>
#include <string>
+#include <vector>
#include "expr/expr.h"
+#include "expr/expr_manager.h"
#include "parser/parser_exception.h"
#include "parser/parser_options.h"
+#include "util/Assert.h"
namespace CVC4 {
-// Forward declarations
-class ExprManager;
class Command;
class Type;
+class FunctionType;
namespace parser {
-class ParserState;
+/** Wrapper around an ANTLR3 input stream. */
+class AntlrInputStream {
+ std::string d_name;
+ pANTLR3_INPUT_STREAM d_input;
+
+ AntlrInputStream(std::string name,pANTLR3_INPUT_STREAM input);
+ /* This is private and throws an exception, because you should never use it. */
+ AntlrInputStream(const AntlrInputStream& inputStream) {
+ Unimplemented("copy constructor for AntlrInputStream");
+ }
+ /* This is private and throws an exception, because you should never use it. */
+ AntlrInputStream& operator=(const AntlrInputStream& inputStream) {
+ Unimplemented("operator= for AntlrInputStream");
+ }
+
+public:
+
+ virtual ~AntlrInputStream();
+
+ pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
+ const std::string getName() const;
+
+ /** Create a file input.
+ *
+ * @param filename the path of the file to read
+ * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
+ * input will use the standard ANTLR3 I/O implementation.
+ */
+ static AntlrInputStream* newFileInputStream(const std::string& name, bool useMmap = false);
+
+ /** Create an input from an istream. */
+ // AntlrInputStream newInputStream(std::istream& input, const std::string& name);
+
+ /** Create a string input.
+ *
+ * @param input the string to read
+ * @param name the "filename" to use when reporting errors
+ */
+ static AntlrInputStream* newStringInputStream(const std::string& input, const std::string& name);
+};
+
+class Parser;
/**
- * An input to be parsed. This class serves two purposes: to the client, it provides
- * the methods <code>parseNextCommand</code> and <code>parseNextExpression</code> to
- * extract a stream of <code>Command</code>'s and <code>Expr</code>'s from the input;
- * to the parser, it provides a repository for state data, like the variable symbol
- * table, and a variety of convenience functions for updating and checking the state.
- *
- * An Input should be created using the static factory methods,
- * e.g., <code>newFileParser</code> and <code>newStringInput</code>, and
- * should be deleted when done.
+ * An input to be parsed. The static factory methods in this class (e.g.,
+ * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
+ * for the given input language and attach it to an input source of the
+ * appropriate type.
*/
class CVC4_PUBLIC Input {
- friend class ParserState;
+ friend class Parser; // for parseError, parseCommand, parseExpr
- /** Whether to de-allocate the input */
- // bool d_deleteInput;
+ /** The display name of the input (e.g., the filename). */
+ std::string d_name;
- ParserState *d_parserState;
+ /** The token lookahead used to lex and parse the input. This should usually be equal to
+ * <code>K</code> for an LL(k) grammar. */
+ unsigned int d_lookahead;
-public:
+ /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
+ * must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
+ pANTLR3_LEXER d_lexer;
- /**
- * Create a new parser for the given file.
- * @param exprManager the ExprManager to use
- * @param filename the path of the file to parse
- */
- Input(ExprManager* exprManager, const std::string& filename);
+ /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
+ * must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
+ * The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
+ * <code>reportError</code> will be set to <code>Input::reportError</code>. */
+ pANTLR3_PARSER d_parser;
- /**
- * Destructor.
+ /** The ANTLR3 token stream associated with this input. We only need this so we can free it on exit.
+ * This is set by <code>setLexer</code>.
+ * NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
+ pANTLR3_COMMON_TOKEN_STREAM d_tokenStream;
+
+ /** The ANTLR3 input stream associated with this input. We only need this so we can free it on exit.
+ * NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
+ AntlrInputStream *d_inputStream;
+
+ /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
+ static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
+
+ /** Builds a message for a lexer error and calls <code>parseError</code>. */
+ static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
+
+ /* Since we own d_tokenStream and it needs to be freed, we need to prevent
+ * copy construction and assignment.
*/
+ Input(const Input& input) { Unimplemented("Copy constructor for Input."); }
+ Input& operator=(const Input& input) { Unimplemented("operator= for Input."); }
+
+public:
+
+ /** Destructor. Frees the token stream and closes the input. */
virtual ~Input();
/** Create an input for the given file.
*
- * @param exprManager the ExprManager for creating expressions from the input
* @param lang the input language
* @param filename the input filename
* @param useMmap true if the parser should use memory-mapped I/O (default: false)
*/
- static Input* newFileInput(ExprManager* exprManager, InputLanguage lang, const std::string& filename, bool useMmap=false);
+ static Input* newFileInput(InputLanguage lang, const std::string& filename, bool useMmap=false);
+
+ /** Create an input for the given AntlrInputStream. NOTE: the new Input
+ * will take ownership of the input stream and delete it at destruction time.
+ *
+ * @param lang the input language
+ * @param inputStream the input stream
+ *
+ * */
+ static Input* newInput(InputLanguage lang, AntlrInputStream *inputStream);
/** Create an input for the given stream.
*
- * @param exprManager the ExprManager for creating expressions from the input
* @param lang the input language
* @param input the input stream
* @param name the name of the stream, for use in error messages
*/
- //static Parser* getNewParser(ExprManager* exprManager, InputLanguage lang, std::istream& input, const std::string& name);
+ //static Parser* newStreamInput(InputLanguage lang, std::istream& input, const std::string& name);
/** Create an input for the given string
*
- * @param exprManager the ExprManager for creating expressions from the input
* @param lang the input language
* @param input the input string
* @param name the name of the stream, for use in error messages
*/
- static Input* newStringInput(ExprManager* exprManager, InputLanguage lang, const std::string& input, const std::string& name);
+ static Input* newStringInput(InputLanguage lang, const std::string& input, const std::string& name);
- /**
- * Check if we are done -- either the end of input has been reached, or some
- * error has been encountered.
- * @return true if parser is done
- */
- bool done() const;
+ /** Retrieve the text associated with a token. */
+ inline static std::string tokenText(pANTLR3_COMMON_TOKEN token);
- /** Enable semantic checks during parsing. */
- void enableChecks();
- /**
- * Disable semantic checks during parsing. Disabling checks may lead
- * to crashes on bad inputs.
- */
- void disableChecks();
-
- /**
- * Parse the next command of the input. If EOF is encountered a EmptyCommand
- * is returned and done flag is set.
+protected:
+ /** Create an input. This input takes ownership of the given input stream,
+ * and will delete it at destruction time.
*
- * @throws ParserException if an error is encountered during parsing.
+ * @param inputStream the input stream to use
+ * @param lookahead the lookahead needed to parse the input (i.e., k for
+ * an LL(k) grammar)
*/
- Command* parseNextCommand() throw(ParserException);
+ Input(AntlrInputStream *inputStream, unsigned int lookahead);
- /**
- * Parse the next expression of the stream. If EOF is encountered a null
- * expression is returned and done flag is set.
- * @return the parsed expression
- * @throws ParserException if an error is encountered during parsing.
- */
- Expr parseNextExpression() throw(ParserException);
+ /** Retrieve the input stream for this parser. */
+ AntlrInputStream *getInputStream();
-protected:
+ /** Retrieve the token stream for this parser. Must not be called before
+ * <code>setLexer()</code>. */
+ pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
- /** Called by <code>parseNextCommand</code> to actually parse a command from
+ /** Parse a command from
* the input by invoking the implementation-specific parsing method. Returns
* <code>NULL</code> if there is no command there to parse.
*
* @throws ParserException if an error is encountered during parsing.
*/
- virtual Command* doParseCommand() throw(ParserException) = 0;
+ virtual Command* parseCommand() throw(ParserException) = 0;
+
+ /**
+ * Throws a <code>ParserException</code> with the given message.
+ */
+ void parseError(const std::string& msg) throw (ParserException);
- /** Called by <code>parseNextExpression</code> to actually parse an
+ /** Parse an
* expression from the input by invoking the implementation-specific
* parsing method. Returns a null <code>Expr</code> if there is no
* expression there to parse.
*
* @throws ParserException if an error is encountered during parsing.
*/
- virtual Expr doParseExpr() throw(ParserException) = 0;
-
- inline ParserState* getParserState() const {
- return d_parserState;
- }
-
-private:
-
- /** Throws a <code>ParserException</code> with the given error message.
- * Implementations should fill in the <code>ParserException</code> with
- * line number information, etc. */
- virtual void parseError(const std::string& msg) throw (ParserException) = 0;
-
-}; // end of class Input
+ virtual Expr parseExpr() throw(ParserException) = 0;
+
+ /** Set the ANTLR3 lexer for this input. */
+ void setAntlr3Lexer(pANTLR3_LEXER pLexer);
+
+ /** Set the ANTLR3 parser implementation for this input. */
+ void setAntlr3Parser(pANTLR3_PARSER pParser);
+
+ /** Set the Parser object for this input. */
+ void setParser(Parser *parser);
+};
+
+std::string Input::tokenText(pANTLR3_COMMON_TOKEN token) {
+ ANTLR3_MARKER start = token->getStartIndex(token);
+ ANTLR3_MARKER end = token->getStopIndex(token);
+ /* start and end are boundary pointers. The text is a string
+ * of (end-start+1) bytes beginning at start. */
+ std::string txt( (const char *)start, end-start+1 );
+ Debug("parser-extra") << "tokenText: start=" << start << std::endl
+ << "end=" << end << std::endl
+ << "txt='" << txt << "'" << std::endl;
+ return txt;
+}
}/* CVC4::parser namespace */
}/* CVC4 namespace */
-#endif /* __CVC4__PARSER__INPUT_H */
+#endif /* __CVC4__PARSER__ANTLR_INPUT_H */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback