From ec78128c2cc4958d462b6b6a35538eeef8f28bff Mon Sep 17 00:00:00 2001 From: "Christopher L. Conway" Date: Mon, 5 Apr 2010 19:10:33 +0000 Subject: Moving code imported from libantlr3c to separate file and adding copyright exception --- src/parser/antlr_input.cpp | 290 --------------------------------------------- 1 file changed, 290 deletions(-) (limited to 'src/parser/antlr_input.cpp') diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp index ab937ac1a..604502e50 100644 --- a/src/parser/antlr_input.cpp +++ b/src/parser/antlr_input.cpp @@ -13,7 +13,6 @@ ** A super-class for ANTLR-generated input language parsers **/ -#include #include #include @@ -92,84 +91,6 @@ pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() { return d_tokenStream; } - -/// Match current input symbol against ttype. Upon error, do one token -/// insertion or deletion if possible. -/// To turn off single token insertion or deletion error -/// recovery, override mismatchRecover() and have it call -/// plain mismatch(), which does not recover. Then any error -/// in a rule will cause an exception and immediate exit from -/// rule. Rule would recover by resynchronizing to the set of -/// symbols that can follow rule ref. -/// -// [chris 4/5/2010] Copy and paste from antlr3baserecognizer.c -void * -AntlrInput::match(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, - pANTLR3_BITSET_LIST follow) { - pANTLR3_PARSER parser; - pANTLR3_TREE_PARSER tparser; - pANTLR3_INT_STREAM is; - void * matchedSymbol; - - switch(recognizer->type) { - case ANTLR3_TYPE_PARSER: - - parser = (pANTLR3_PARSER)(recognizer->super); - tparser = NULL; - is = parser->tstream->istream; - - break; - - case ANTLR3_TYPE_TREE_PARSER: - - tparser = (pANTLR3_TREE_PARSER)(recognizer->super); - parser = NULL; - is = tparser->ctnstream->tnstream->istream; - - break; - - default: - - ANTLR3_FPRINTF( - stderr, - "Base recognizer function 'match' called by unknown parser type - provide override for this function\n"); - return ANTLR3_FALSE; - - break; - } - - // Pick up the current input token/node for assignment to labels - // - matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); - - if(is->_LA(is, 1) == ttype) { - // The token was the one we were told to expect - // - is->consume(is); // Consume that token from the stream - recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were) - recognizer->state->failed = ANTLR3_FALSE; // The match was a success - return matchedSymbol; // We are done - } - - // We did not find the expected token type, if we are backtracking then - // we just set the failed flag and return. - // - if(recognizer->state->backtracking > 0) { - // Backtracking is going on - // - recognizer->state->failed = ANTLR3_TRUE; - return matchedSymbol; - } - - // We did not find the expected token and there is no backtracking - // going on, so we mismatch, which creates an exception in the recognizer exception - // stack. - // - matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, - follow); - return matchedSymbol; -} - void AntlrInput::parseError(const std::string& message) throw (ParserException) { Debug("parser") << "Throwing exception: " @@ -182,217 +103,6 @@ void AntlrInput::parseError(const std::string& message) d_lexer->getCharPositionInLine(d_lexer)); } -void * -AntlrInput::recoverFromMismatchedToken(pANTLR3_BASE_RECOGNIZER recognizer, - ANTLR3_UINT32 ttype, - pANTLR3_BITSET_LIST follow) { - - pANTLR3_PARSER parser = (pANTLR3_PARSER) (recognizer->super); - pANTLR3_INT_STREAM is = parser->tstream->istream; - void *matchedSymbol; - - - // Create an exception if we need one - // - if(recognizer->state->exception == NULL) { - antlr3RecognitionExceptionNew(recognizer); - } - - if(recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE) { - recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; - recognizer->state->exception->message - = (void*)ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; - } - - if(recognizer->mismatchIsMissingToken(recognizer, is, follow)) { - matchedSymbol = recognizer->getMissingSymbol(recognizer, is, - recognizer->state->exception, - ttype, follow); - recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; - recognizer->state->exception->message = (void*)ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; - recognizer->state->exception->token = matchedSymbol; - recognizer->state->exception->expecting = ttype; - } - - reportError(recognizer); - Unreachable("reportError should have thrown exception in AntlrInput::recoverFromMismatchedToken"); -} - -void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) { - pANTLR3_EXCEPTION ex = recognizer->state->exception; - pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames; - stringstream ss; -// std::string msg; - - // Signal we are in error recovery now - recognizer->state->errorRecovery = ANTLR3_TRUE; - - // Indicate this recognizer had an error while processing. - recognizer->state->errorCount++; - - // Call the builtin error formatter - // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); - - /* This switch statement is adapted from antlr3baserecognizer.c:displayRecognitionError in libantlr3c. - * TODO: Make error messages more useful, maybe by including more expected tokens and information - * about the current token. */ - switch(ex->type) { - case ANTLR3_UNWANTED_TOKEN_EXCEPTION: - - // Indicates that the recognizer was fed a token which seems to be - // spurious input. We can detect this when the token that follows - // this unwanted token would normally be part of the syntactically - // correct stream. Then we can see that the token we are looking at - // is just something that should not be there and throw this exception. - // - if(tokenNames == NULL) { - ss << "Unexpected token." ; - } else { - if(ex->expecting == ANTLR3_TOKEN_EOF) { - ss << "Expected end of file."; - } else { - ss << "Expected " << tokenNames[ex->expecting] - << ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; - } - } - break; - - case ANTLR3_MISSING_TOKEN_EXCEPTION: - - // Indicates that the recognizer detected that the token we just - // hit would be valid syntactically if preceded by a particular - // token. Perhaps a missing ';' at line end or a missing ',' in an - // expression list, and such like. - // - if(tokenNames == NULL) { - ss << "Missing token (" << ex->expecting << ")."; - } else { - if(ex->expecting == ANTLR3_TOKEN_EOF) { - ss << "Missing end of file marker."; - } else { - ss << "Missing " << tokenNames[ex->expecting] << "."; - } - } - break; - - case ANTLR3_RECOGNITION_EXCEPTION: - - // Indicates that the recognizer received a token - // in the input that was not predicted. This is the basic exception type - // from which all others are derived. So we assume it was a syntax error. - // You may get this if there are not more tokens and more are needed - // to complete a parse for instance. - // - ss <<"Syntax error."; - break; - - case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: - - // We were expecting to see one thing and got another. This is the - // most common error if we could not detect a missing or unwanted token. - // Here you can spend your efforts to - // derive more useful error messages based on the expected - // token set and the last token and so on. The error following - // bitmaps do a good job of reducing the set that we were looking - // for down to something small. Knowing what you are parsing may be - // able to allow you to be even more specific about an error. - // - if(tokenNames == NULL) { - ss << "Syntax error."; - } else { - if(ex->expecting == ANTLR3_TOKEN_EOF) { - ss << "Expected end of file."; - } else { - ss << "Expected " << tokenNames[ex->expecting] << "."; - } - } - break; - - case ANTLR3_NO_VIABLE_ALT_EXCEPTION: - // We could not pick any alt decision from the input given - // so god knows what happened - however when you examine your grammar, - // you should. It means that at the point where the current token occurred - // that the DFA indicates nowhere to go from here. - // - ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; - break; - - case ANTLR3_MISMATCHED_SET_EXCEPTION: - - { - ANTLR3_UINT32 count; - ANTLR3_UINT32 bit; - ANTLR3_UINT32 size; - ANTLR3_UINT32 numbits; - pANTLR3_BITSET errBits; - - // This means we were able to deal with one of a set of - // possible tokens at this point, but we did not see any - // member of that set. - // - ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) - << "'. Expected one of: "; - - // What tokens could we have accepted at this point in the - // parse? - // - count = 0; - errBits = antlr3BitsetLoad(ex->expectingSet); - numbits = errBits->numBits(errBits); - size = errBits->size(errBits); - - if(size > 0) { - // However many tokens we could have dealt with here, it is usually - // not useful to print ALL of the set here. I arbitrarily chose 8 - // here, but you should do whatever makes sense for you of course. - // No token number 0, so look for bit 1 and on. - // - for(bit = 1; bit < numbits && count < 8 && count < size; bit++) { - // TODO: This doesn;t look right - should be asking if the bit is set!! - // - if(tokenNames[bit]) { - if( count++ > 0 ) { - ss << ", "; - } - ss << tokenNames[bit]; - } - } - } else { - Unreachable("Parse error with empty set of expected tokens."); - } - } - break; - - case ANTLR3_EARLY_EXIT_EXCEPTION: - - // We entered a loop requiring a number of token sequences - // but found a token that ended that sequence earlier than - // we should have done. - // - ss << "Sequence terminated early by token: '" - << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; - break; - - default: - - // We don't handle any other exceptions here, but you can - // if you wish. If we get an exception that hits this point - // then we are just going to report what we know about the - // token. - // - Unhandled("Unexpected exception in parser."); - break; - } - - // Now get ready to throw an exception - pANTLR3_PARSER parser = (pANTLR3_PARSER)(recognizer->super); - AlwaysAssert(parser!=NULL); - ParserState *parserState = (ParserState*)(parser->super); - AlwaysAssert(parserState!=NULL); - - // Call the error display routine - parserState->parseError(ss.str()); -} void AntlrInput::setLexer(pANTLR3_LEXER pLexer) { d_lexer = pLexer; -- cgit v1.2.3