From 24093bf30356975b5017613d94bf2927521b666f Mon Sep 17 00:00:00 2001 From: Kshitij Bansal Date: Thu, 24 Apr 2014 21:35:03 -0400 Subject: attempt to improve CVC4's "parse error" message --- src/parser/antlr_input.cpp | 155 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 4 deletions(-) (limited to 'src/parser') diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp index d498d3c54..88b43eb0e 100644 --- a/src/parser/antlr_input.cpp +++ b/src/parser/antlr_input.cpp @@ -3,9 +3,9 @@ ** \verbatim ** Original author: Christopher L. Conway ** Major contributors: Morgan Deters - ** Minor contributors (to current version): Francois Bobot + ** Minor contributors (to current version): Francois Bobot, Kshitij Bansal ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2013 New York University and The University of Iowa + ** Copyright (c) 2009-2014 New York University and The University of Iowa ** See the file COPYING in the top-level source directory for licensing ** information.\endverbatim ** @@ -284,20 +284,167 @@ void AntlrInput::warning(const std::string& message) { Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl; } + +/** + * characters considered part of a simple symbol in SMTLIB. + * + * TODO: Ideally this code shouldn't be smtlib specific (should work + * with CVC language too), but this per-language specialization has + * been left for a later point. + */ +inline bool isSimpleChar(char ch) { + return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL); +} + +/** + * Gets part of original input and tries to visually hint where the + * error might be. + * + * Something like: + * + * ...nd (= alpha beta) (= beta delta)) + * ^ + * + * Implementation (as on 2014/04/24): + * + * > if suggested pointer by lexer is under a "simple char", move to + * start of the word and print pointer there. + * + * > in the other case, it tries to find the nearest word in the error + * message passed along. if it can't find it, we don't add this + * visual hint, as experimentally position suggested by lexer was + * found to be totally unhelpful. (TODO: fix this upstream to + * improve) + */ +std::string parseErrorHelper(const char* lineStart, int charPositionInLine, const std::string& message) +{ + // Is it a multi-line message + bool multilineMessage = (message.find('\n') != string::npos); + // Useful only if it is a multi-line message + int firstLineEnd = message.find('\n'); + + std::ostringstream ss, slicess; + + // Keep first line of message + if(multilineMessage) { + ss << message.substr(0, firstLineEnd) << endl << endl; + } else { + ss << message << endl << endl; + } + + int posSliceStart = (charPositionInLine - 50 <= 0) ? 0 : charPositionInLine - 50 + 5; + int posSliceEnd = posSliceStart + 70; + int caretPos = 0; + int caretPosExtra = 0; // for inital intendation, epilipses etc. + + ss << " "; caretPosExtra += 2; + if(posSliceStart > 0) { + ss << "..."; caretPosExtra += 3; + } + + for(int i = posSliceStart; lineStart[i] != '\n'; ++i) { + if(i == posSliceEnd) { + ss << "..."; + break; + } + if(i < charPositionInLine) { caretPos++; } + + if(!isprint(lineStart[i])) { + // non-printable character, something wrong, bail out + return message; + } + + ss << (lineStart[i]); + slicess << (lineStart[i]); + } + + // adjust position of caret, based on slice and message + { + int caretPosOrig = caretPos; + string slice = slicess.str(); + if(isSimpleChar(slice[caretPos])) { + // if alphanumeric, try to go to beginning of word/number + while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; } + if(caretPos == 0 && posSliceStart > 0) { + // reached start and this is not really the start? bail out + return message; + } else { + // likely it is also in original message? if so, very likely + // we found the right place + string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1)); + int messagePosSt = message.find(word); + int messagePosEn = messagePosSt + (caretPosOrig - caretPos); + if( messagePosSt < string::npos && + (messagePosSt == 0 || !isSimpleChar(message[messagePosSt-1]) ) && + (messagePosEn+1 == message.size() || !isSimpleChar(message[messagePosEn+1]) ) ) { + // ^the complicated if statement is just 'whole-word' match + Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl; + } + } + } else { + // go to nearest alphanumeric string (before current position), + // see if that word can be found in original message. If so, + // point to that, else keep pointer where it was. + int nearestWordEn = caretPos - 1; + while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) { + --nearestWordEn; + } + if(isSimpleChar(slice[nearestWordEn])) { + int nearestWordSt = nearestWordEn; + while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) { + --nearestWordSt; + } + string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1)); + Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl; + int messagePosSt = message.find(word); + int messagePosEn = messagePosSt + (nearestWordEn - nearestWordSt + 1); + if( messagePosSt < string::npos && + (messagePosSt == 0 || !isSimpleChar(message[messagePosSt-1]) ) && + (messagePosEn+1 == message.size() || !isSimpleChar(message[messagePosEn+1]) ) ) { + // ^the complicated if statement is just 'whole-word' match + Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at " + << nearestWordSt << std::endl; + caretPos = nearestWordSt; + } else { + // this doesn't look good. caret generally getting printed + // at unhelpful positions. improve upstream? + return message; + } + } + } + caretPos += caretPosExtra; + }// end of caret position computation/heuristics + + ss << std::endl; + while( caretPos-- > 0 ) { + ss << ' '; + } + ss << '^' << endl; + if(multilineMessage) { + ss << message.substr(firstLineEnd, message.size() - firstLineEnd);; + } + return ss.str(); +} + void AntlrInput::parseError(const std::string& message, bool eofException) throw (ParserException) { + + string updatedMessage = parseErrorHelper((const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream), + d_lexer->getCharPositionInLine(d_lexer), + message); + Debug("parser") << "Throwing exception: " << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":" << d_lexer->getLine(d_lexer) << "." << d_lexer->getCharPositionInLine(d_lexer) << ": " - << message << endl; + << updatedMessage << endl; if(eofException) { throw ParserEndOfFileException(message, (const char*)d_lexer->rec->state->tokSource->fileName->chars, d_lexer->getLine(d_lexer), d_lexer->getCharPositionInLine(d_lexer)); } else { - throw ParserException(message, + throw ParserException(updatedMessage, (const char*)d_lexer->rec->state->tokSource->fileName->chars, d_lexer->getLine(d_lexer), d_lexer->getCharPositionInLine(d_lexer)); -- cgit v1.2.3