Merging from branches/antlr3 (r246:354)

author: Christopher L. Conway <christopherleeconway@gmail.com> 2010-03-30 20:22:33 +0000
committer: Christopher L. Conway <christopherleeconway@gmail.com> 2010-03-30 20:22:33 +0000
commit: 8730e9320a833a9eb0e65074f9988950b7424c0c (patch)
tree: 1cb09404256743e208fece079ba473595e05edcd /src/parser/antlr_input.cpp
parent: 8c87c05ac56a5f29b2ae1e658f2d7d3b7b588163 (diff)
1 files changed, 317 insertions, 0 deletions
diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp
new file mode 100644
index 000000000..02e07bc8f
--- /dev/null
+++ b/src/parser/antlr_input.cpp
@@ -0,0 +1,317 @@
+/*********************                                                        */
+/** antlr_parser.cpp
+ ** Original author: dejan
+ ** Major contributors: cconway
+ ** Minor contributors (to current version): mdeters
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2009, 2010  The Analysis of Computer Systems Group (ACSys)
+ ** Courant Institute of Mathematical Sciences
+ ** New York University
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.
+ **
+ ** A super-class for ANTLR-generated input language parsers
+ **/
+
+/*
+ * antlr_parser.cpp
+ *
+ *  Created on: Nov 30, 2009
+ *      Author: dejan
+ */
+
+#include <iostream>
+#include <limits.h>
+#include <antlr3.h>
+
+#include "util/output.h"
+#include "util/Assert.h"
+#include "expr/command.h"
+#include "expr/type.h"
+#include "parser/antlr_input.h"
+#include "parser/bounded_token_buffer.h"
+#include "parser/bounded_token_factory.h"
+#include "parser/memory_mapped_input_buffer.h"
+#include "parser/parser_exception.h"
+
+using namespace std;
+using namespace CVC4;
+using namespace CVC4::parser;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace parser {
+
+AntlrInput::AntlrInput(ExprManager* exprManager, const std::string& filename, unsigned int lookahead, bool useMmap) :
+    Input(exprManager, filename),
+    d_lookahead(lookahead),
+    d_lexer(NULL),
+    d_parser(NULL),
+    d_tokenStream(NULL) {
+
+  if( useMmap ) {
+    d_input = MemoryMappedInputBufferNew(filename);
+  } else {
+    d_input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) filename.c_str());
+  }
+  if( d_input == NULL ) {
+    throw ParserException("Couldn't open file: " + filename);
+  }
+}
+
+/*
+AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
+  Parser(exprManager,name),
+  d_lookahead(lookahead) {
+
+}
+*/
+
+AntlrInput::AntlrInput(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
+  Input(exprManager,name),
+  d_lookahead(lookahead),
+  d_lexer(NULL),
+  d_parser(NULL),
+  d_tokenStream(NULL) {
+  char* inputStr = strdup(input.c_str());
+  char* nameStr = strdup(name.c_str());
+  if( inputStr==NULL || nameStr==NULL ) {
+    throw ParserException("Couldn't initialize string input: '" + input + "'");
+  }
+  d_input = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
+  if( d_input == NULL ) {
+    throw ParserException("Couldn't create input stream for string: '" + input + "'");
+  }
+}
+
+AntlrInput::~AntlrInput() {
+  d_tokenStream->free(d_tokenStream);
+  d_input->close(d_input);
+}
+
+pANTLR3_INPUT_STREAM AntlrInput::getInputStream() {
+  return d_input;
+}
+
+pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
+  return d_tokenStream;
+}
+
+void AntlrInput::parseError(const std::string& message)
+    throw (ParserException) {
+  Debug("parser") << "Throwing exception: " << getFilename() << ":"
+      << d_lexer->getLine(d_lexer) << "."
+      << d_lexer->getCharPositionInLine(d_lexer) << ": "
+      << message << endl;
+  throw ParserException(message, getFilename(), d_lexer->getLine(d_lexer),
+                          d_lexer->getCharPositionInLine(d_lexer));
+}
+
+void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) {
+  pANTLR3_EXCEPTION ex = recognizer->state->exception;
+  pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames;
+  stringstream ss;
+//  std::string msg;
+
+  // Signal we are in error recovery now
+  recognizer->state->errorRecovery = ANTLR3_TRUE;
+
+  // Indicate this recognizer had an error while processing.
+  recognizer->state->errorCount++;
+
+  // Call the builtin error formatter
+  // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
+
+  /* This switch statement is adapted from antlr3baserecognizer.c:displayRecognitionError in libantlr3c.
+   * TODO: Make error messages more useful, maybe by including more expected tokens and information
+   * about the current token. */
+  switch(ex->type) {
+  case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
+
+    // Indicates that the recognizer was fed a token which seems to be
+    // spurious input. We can detect this when the token that follows
+    // this unwanted token would normally be part of the syntactically
+    // correct stream. Then we can see that the token we are looking at
+    // is just something that should not be there and throw this exception.
+    //
+    if(tokenNames == NULL) {
+      ss << "Unexpected token." ;
+    } else {
+      if(ex->expecting == ANTLR3_TOKEN_EOF) {
+        ss << "Expected end of file.";
+      } else {
+        ss << "Expected " << tokenNames[ex->expecting] << ".";
+      }
+    }
+    break;
+
+  case ANTLR3_MISSING_TOKEN_EXCEPTION:
+
+    // Indicates that the recognizer detected that the token we just
+    // hit would be valid syntactically if preceded by a particular
+    // token. Perhaps a missing ';' at line end or a missing ',' in an
+    // expression list, and such like.
+    //
+    if(tokenNames == NULL) {
+      ss << "Missing token (" << ex->expecting << ").";
+    } else {
+      if(ex->expecting == ANTLR3_TOKEN_EOF) {
+        ss << "Missing end of file marker.";
+      } else {
+        ss << "Missing " << tokenNames[ex->expecting] << ".";
+      }
+    }
+    break;
+
+  case ANTLR3_RECOGNITION_EXCEPTION:
+
+    // Indicates that the recognizer received a token
+    // in the input that was not predicted. This is the basic exception type
+    // from which all others are derived. So we assume it was a syntax error.
+    // You may get this if there are not more tokens and more are needed
+    // to complete a parse for instance.
+    //
+    ss <<"Syntax error.";
+    break;
+
+  case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
+
+    // We were expecting to see one thing and got another. This is the
+    // most common error if we could not detect a missing or unwanted token.
+    // Here you can spend your efforts to
+    // derive more useful error messages based on the expected
+    // token set and the last token and so on. The error following
+    // bitmaps do a good job of reducing the set that we were looking
+    // for down to something small. Knowing what you are parsing may be
+    // able to allow you to be even more specific about an error.
+    //
+    if(tokenNames == NULL) {
+      ss << "Syntax error.";
+    } else {
+      if(ex->expecting == ANTLR3_TOKEN_EOF) {
+        ss << "Expected end of file.";
+      } else {
+        ss << "Expected " << tokenNames[ex->expecting] << ".";
+      }
+    }
+    break;
+
+  case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
+
+    // We could not pick any alt decision from the input given
+    // so god knows what happened - however when you examine your grammar,
+    // you should. It means that at the point where the current token occurred
+    // that the DFA indicates nowhere to go from here.
+    //
+    ss << "Cannot match to any predicted input.";
+
+    break;
+
+  case ANTLR3_MISMATCHED_SET_EXCEPTION:
+
+  {
+    ANTLR3_UINT32 count;
+    ANTLR3_UINT32 bit;
+    ANTLR3_UINT32 size;
+    ANTLR3_UINT32 numbits;
+    pANTLR3_BITSET errBits;
+
+    // This means we were able to deal with one of a set of
+    // possible tokens at this point, but we did not see any
+    // member of that set.
+    //
+    ss << "Unexpected input. Expected one of : ";
+
+    // What tokens could we have accepted at this point in the
+    // parse?
+    //
+    count = 0;
+    errBits = antlr3BitsetLoad(ex->expectingSet);
+    numbits = errBits->numBits(errBits);
+    size = errBits->size(errBits);
+
+    if(size > 0) {
+      // However many tokens we could have dealt with here, it is usually
+      // not useful to print ALL of the set here. I arbitrarily chose 8
+      // here, but you should do whatever makes sense for you of course.
+      // No token number 0, so look for bit 1 and on.
+      //
+      for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
+        // TODO: This doesn;t look right - should be asking if the bit is set!!
+        //
+        if(tokenNames[bit]) {
+          if( count++ > 0 ) {
+            ss << ", ";
+          }
+          ss << tokenNames[bit];
+        }
+      }
+    } else {
+      Unreachable("Parse error with empty set of expected tokens.");
+    }
+  }
+    break;
+
+  case ANTLR3_EARLY_EXIT_EXCEPTION:
+
+    // We entered a loop requiring a number of token sequences
+    // but found a token that ended that sequence earlier than
+    // we should have done.
+    //
+    ss << "Missing elements.";
+    break;
+
+  default:
+
+    // We don't handle any other exceptions here, but you can
+    // if you wish. If we get an exception that hits this point
+    // then we are just going to report what we know about the
+    // token.
+    //
+    Unhandled("Unexpected exception in parser.");
+    break;
+  }
+
+  // Now get ready to throw an exception
+  pANTLR3_PARSER parser = (pANTLR3_PARSER)(recognizer->super);
+  AlwaysAssert(parser!=NULL);
+  AntlrInput *input = (AntlrInput*)(parser->super);
+  AlwaysAssert(input!=NULL);
+
+  // Call the error display routine
+  input->parseError(ss.str());
+}
+
+void AntlrInput::setLexer(pANTLR3_LEXER pLexer) {
+  d_lexer = pLexer;
+
+  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
+  if( pTokenFactory != NULL ) {
+    pTokenFactory->close(pTokenFactory);
+  }
+
+  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
+  pTokenFactory = BoundedTokenFactoryNew(d_input, 2*d_lookahead);
+  if( pTokenFactory == NULL ) {
+    throw ParserException("Couldn't create token factory.");
+  }
+  d_lexer->rec->state->tokFactory = pTokenFactory;
+
+  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
+  if( buffer == NULL ) {
+    throw ParserException("Couldn't create token buffer.");
+  }
+
+  d_tokenStream = buffer->commonTstream;
+}
+
+void AntlrInput::setParser(pANTLR3_PARSER pParser) {
+  d_parser = pParser;
+  // ANTLR isn't using super, AFAICT.
+  d_parser->super = this;
+  d_parser->rec->reportError = &reportError;
+}
+
+
+}/* CVC4::parser namespace */
+}/* CVC4 namespace */
author	Christopher L. Conway <christopherleeconway@gmail.com>	2010-03-30 20:22:33 +0000
committer	Christopher L. Conway <christopherleeconway@gmail.com>	2010-03-30 20:22:33 +0000
commit	8730e9320a833a9eb0e65074f9988950b7424c0c (patch)
tree	1cb09404256743e208fece079ba473595e05edcd /src/parser/antlr_input.cpp
parent	8c87c05ac56a5f29b2ae1e658f2d7d3b7b588163 (diff)