/********************* */ /*! \file antlr_input.cpp ** \verbatim ** Top contributors (to current version): ** Christopher L. Conway, Kshitij Bansal, Tim King ** This file is part of the CVC4 project. ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** ** \brief A super-class for ANTLR-generated input language parsers. ** ** A super-class for ANTLR-generated input language parsers **/ #include "parser/antlr_input.h" // We rely on the inclusion of #include in "parser/antlr_input.h". // This is avoid having to undefine the symbols in . // See the documentation in "parser/antlr_undefines.h" for more // details. #include #include #include "base/output.h" #include "expr/type.h" #include "parser/antlr_line_buffered_input.h" #include "parser/bounded_token_buffer.h" #include "parser/bounded_token_factory.h" #include "parser/cvc/cvc_input.h" #include "parser/input.h" #include "parser/memory_mapped_input_buffer.h" #include "parser/parser.h" #include "parser/parser_exception.h" #include "parser/smt1/smt1_input.h" #include "parser/smt2/smt2_input.h" #include "parser/smt2/sygus_input.h" #include "parser/tptp/tptp_input.h" #include "smt/command.h" using namespace std; using namespace CVC4; using namespace CVC4::parser; using namespace CVC4::kind; namespace CVC4 { namespace parser { // These functions exactly wrap the antlr3 source inconsistencies. // These are the only location CVC4_ANTLR3_OLD_INPUT_STREAM ifdefs appear. // No other sanity checking happens; pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, const std::string& name, LineBuffer* line_buffer); pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name); pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep, uint32_t size, const std::string& name); pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, const std::string& name, LineBuffer* line_buffer) { pANTLR3_INPUT_STREAM inputStream = NULL; pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str()); #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM inputStream = antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, name_duplicate, line_buffer); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ free(name_duplicate); return inputStream; } pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){ pANTLR3_INPUT_STREAM input = NULL; pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str()); // libantlr3c v3.2 isn't source-compatible with v3.4 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM input = antlr3AsciiFileStreamNew(name_duplicate); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ free(name_duplicate); return input; } pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep, uint32_t size, const std::string& name){ pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str()); pANTLR3_INPUT_STREAM inputStream = NULL; /* Create an ANTLR input backed by the buffer. */ #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM inputStream = antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ inputStream = antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size, name_duplicate); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ free(name_duplicate); return inputStream; } AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input, bool fileIsTemporary, pANTLR3_UINT8 inputString, LineBuffer* line_buffer) : InputStream(name, fileIsTemporary), d_input(input), d_inputString(inputString), d_line_buffer(line_buffer) { assert( input != NULL ); input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str()); } AntlrInputStream::~AntlrInputStream() { d_input->free(d_input); if(d_inputString != NULL){ free(d_inputString); } if (d_line_buffer != NULL) { delete d_line_buffer; } } pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const { return d_input; } AntlrInputStream* AntlrInputStream::newFileInputStream(const std::string& name, bool useMmap) { #ifdef _WIN32 if(useMmap) { useMmap = false; } #endif pANTLR3_INPUT_STREAM input = NULL; if(useMmap) { input = MemoryMappedInputBufferNew(name); } else { input = newAntlr3FileStream(name); } if(input == NULL) { throw InputStreamException("Couldn't open file: " + name); } return new AntlrInputStream(name, input, false, NULL, NULL); } AntlrInputStream* AntlrInputStream::newStreamInputStream(std::istream& input, const std::string& name, bool lineBuffered) { pANTLR3_INPUT_STREAM inputStream = NULL; pANTLR3_UINT8 inputStringCopy = NULL; LineBuffer* line_buffer = NULL; if(lineBuffered) { line_buffer = new LineBuffer(&input); inputStream = newAntlr3BufferedStream(input, name, line_buffer); } else { // Since these are all NULL on entry, realloc will be called char *basep = NULL, *boundp = NULL, *cp = NULL; /* 64KB seems like a reasonable default size. */ size_t bufSize = 0x10000; /* Keep going until we can't go no more. */ while( !input.eof() && !input.fail() ) { if( cp == boundp ) { /* We ran out of room in the buffer. Realloc at double the size. */ ptrdiff_t offset = cp - basep; basep = (char *) realloc(basep, bufSize); if( basep == NULL ) { throw InputStreamException("Failed buffering input stream: " + name); } cp = basep + offset; boundp = basep + bufSize; bufSize *= 2; } /* Read as much as we have room for. */ input.read( cp, boundp - cp ); cp += input.gcount(); } /* Make sure the fail bit didn't get set. */ if( !input.eof() ) { throw InputStreamException("Stream input failed: " + name); } ptrdiff_t offset = cp - basep; assert(offset >= 0); assert(offset <= std::numeric_limits::max()); inputStringCopy = (pANTLR3_UINT8)basep; inputStream = newAntrl3InPlaceStream(inputStringCopy, (uint32_t) offset, name); } if( inputStream == NULL ) { throw InputStreamException("Couldn't initialize input: " + name); } return new AntlrInputStream(name, inputStream, false, inputStringCopy, line_buffer); } AntlrInputStream* AntlrInputStream::newStringInputStream(const std::string& input, const std::string& name) { size_t input_size = input.size(); assert(input_size <= std::numeric_limits::max()); // Ownership of input_duplicate is transferred to the AntlrInputStream. pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str()); if( input_duplicate == NULL ) { throw InputStreamException("Couldn't initialize string input: '" + input + "'"); } pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name); if( inputStream==NULL ) { throw InputStreamException("Couldn't initialize string input: '" + input + "'"); } return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL); } AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) { using namespace language::input; AntlrInput* input; switch(lang) { case LANG_CVC4: { input = new CvcInput(inputStream); break; } case LANG_SMTLIB_V1: input = new Smt1Input(inputStream); break; case LANG_SYGUS: input = new SygusInput(inputStream); break; case LANG_TPTP: input = new TptpInput(inputStream); break; default: if (language::isInputLang_smt2(lang)) { input = new Smt2Input(inputStream); } else { std::stringstream ss; ss << "internal error: unhandled language " << lang << " in AntlrInput::newInput"; throw InputStreamException(ss.str()); } } return input; } AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) : Input(inputStream), d_lookahead(lookahead), d_lexer(NULL), d_parser(NULL), d_antlr3InputStream( inputStream.getAntlr3InputStream() ), d_tokenBuffer(NULL) { } /* AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead) Parser(exprManager,name), d_lookahead(lookahead) { } */ /* AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) : Input(exprManager,name), d_lookahead(lookahead), d_lexer(NULL), d_parser(NULL), d_tokenStream(NULL) { char* inputStr = strdup(input.c_str()); char* nameStr = strdup(name.c_str()); if( inputStr==NULL || nameStr==NULL ) { throw ParserException("Couldn't initialize string input: '" + input + "'"); } d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr); if( d_inputStream == NULL ) { throw ParserException("Couldn't create input stream for string: '" + input + "'"); } } */ AntlrInput::~AntlrInput() { BoundedTokenBufferFree(d_tokenBuffer); } pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() { return d_tokenBuffer->commonTstream; } void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) { pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super); assert(lexer!=NULL); Parser *parser = (Parser*)(lexer->super); assert(parser!=NULL); AntlrInput *input = (AntlrInput*) parser->getInput(); assert(input!=NULL); /* Call the error display routine *if* there's not already a * parse error pending. If a parser error is pending, this * error is probably less important, so we just drop it. */ if( input->d_parser->rec->state->error == ANTLR3_FALSE ) { input->parseError("Error finding next token."); } } void AntlrInput::warning(const std::string& message) { Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl; } /** * characters considered part of a simple symbol in SMTLIB. * * TODO: Ideally this code shouldn't be smtlib specific (should work * with CVC language too), but this per-language specialization has * been left for a later point. */ inline bool isSimpleChar(char ch) { return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL); } size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) { size_t st = 0; size_t N = input.size(); while(st < N) { while( st < N && (*isWordChar)(input[st]) == false ) st++; size_t en = st; while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++; if(en - st + 1 == pattern.size()) { bool match = true; for(size_t i = 0; match && i < pattern.size(); ++i) { match &= (pattern[i] == input[st+i]); } if(match == true) { return st; } } st = en + 1; } return string::npos; } /** * Gets part of original input and tries to visually hint where the * error might be. * * Something like: * * ...nd (= alpha beta) (= beta delta)) * ^ * * Implementation (as on 2014/04/24): * * > if suggested pointer by lexer is under a "simple char", move to * start of the word and print pointer there. * * > in the other case, it tries to find the nearest word in the error * message passed along. if it can't find it, we don't add this * visual hint, as experimentally position suggested by lexer was * found to be totally unhelpful. (TODO: fix this upstream to * improve) */ std::string parseErrorHelper(const char* lineStart, int charPositionInLine, const std::string& message) { // Is it a multi-line message bool multilineMessage = (message.find('\n') != string::npos); // Useful only if it is a multi-line message int firstLineEnd = message.find('\n'); std::ostringstream ss, slicess; // Keep first line of message if(multilineMessage) { ss << message.substr(0, firstLineEnd) << endl << endl; } else { ss << message << endl << endl; } int posSliceStart = (charPositionInLine - 50 <= 0) ? 0 : charPositionInLine - 50 + 5; int posSliceEnd = posSliceStart + 70; int caretPos = 0; int caretPosExtra = 0; // for inital intendation, epilipses etc. ss << " "; caretPosExtra += 2; if(posSliceStart > 0) { ss << "..."; caretPosExtra += 3; } for(int i = posSliceStart; lineStart[i] != '\n'; ++i) { if(i == posSliceEnd) { ss << "..."; break; } if(i < charPositionInLine) { caretPos++; } if(!isprint(lineStart[i])) { // non-printable character, something wrong, bail out return message; } ss << (lineStart[i]); slicess << (lineStart[i]); } // adjust position of caret, based on slice and message { int caretPosOrig = caretPos; string slice = slicess.str(); if(isSimpleChar(slice[caretPos])) { // if alphanumeric, try to go to beginning of word/number while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; } if(caretPos == 0 && posSliceStart > 0) { // reached start and this is not really the start? bail out return message; } else { // likely it is also in original message? if so, very likely // we found the right place string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1)); size_t matchLoc = wholeWordMatch(message, word, isSimpleChar); Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl; if( matchLoc != string::npos ) { Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl; } } } else { bool foundCaretPos = false; for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) { // go to nearest alphanumeric string (before current position), // see if that word can be found in original message. If so, // point to that, else keep pointer where it was. int nearestWordEn = caretPos - 1; while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) { --nearestWordEn; } if(isSimpleChar(slice[nearestWordEn])) { int nearestWordSt = nearestWordEn; while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) { --nearestWordSt; } string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1)); size_t matchLoc = wholeWordMatch(message, word, isSimpleChar); Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl; Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl; if( matchLoc != string::npos ) { Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at " << nearestWordSt << std::endl; foundCaretPos = true; } caretPos = nearestWordSt; } } if( !foundCaretPos) { // this doesn't look good. caret generally getting printed // at unhelpful positions. improve upstream? return message; } } caretPos += caretPosExtra; }// end of caret position computation/heuristics ss << std::endl; while( caretPos-- > 0 ) { ss << ' '; } ss << '^' << endl; if(multilineMessage) { ss << message.substr(firstLineEnd, message.size() - firstLineEnd);; } return ss.str(); } void AntlrInput::parseError(const std::string& message, bool eofException) { string updatedMessage = parseErrorHelper((const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream), d_lexer->getCharPositionInLine(d_lexer), message); Debug("parser") << "Throwing exception: " << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":" << d_lexer->getLine(d_lexer) << "." << d_lexer->getCharPositionInLine(d_lexer) << ": " << updatedMessage << endl; if(eofException) { throw ParserEndOfFileException(message, (const char*)d_lexer->rec->state->tokSource->fileName->chars, d_lexer->getLine(d_lexer), d_lexer->getCharPositionInLine(d_lexer)); } else { throw ParserException(updatedMessage, (const char*)d_lexer->rec->state->tokSource->fileName->chars, d_lexer->getLine(d_lexer), d_lexer->getCharPositionInLine(d_lexer)); } } void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) { d_lexer = pLexer; pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory; if( pTokenFactory != NULL ) { pTokenFactory->close(pTokenFactory); } /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */ pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead); if( pTokenFactory == NULL ) { throw InputStreamException("Couldn't create token factory."); } d_lexer->rec->state->tokFactory = pTokenFactory; pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource); if( buffer == NULL ) { throw InputStreamException("Couldn't create token buffer."); } d_tokenBuffer = buffer; // Override default lexer error reporting d_lexer->rec->reportError = &lexerError; // Override default nextToken function, just to prevent exceptions escaping. d_lexer->rec->state->tokSource->nextToken = &nextToken; } void AntlrInput::setParser(Parser& parser) { // ANTLR isn't using super in the lexer or the parser, AFAICT. // We could also use @lexer/parser::context to add a field to the generated // objects, but then it would have to be declared separately in every // language's grammar and we'd have to in the address of the field anyway. d_lexer->super = &parser; d_parser->super = &parser; } void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) { d_parser = pParser; // d_parser->rec->match = &match; d_parser->rec->reportError = &reportError; /* Don't try to recover from a parse error. */ // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it. d_parser->rec->recoverFromMismatchedToken = (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*)) d_parser->rec->mismatch; } }/* CVC4::parser namespace */ }/* CVC4 namespace */