diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/Makefile.am | 2 | ||||
-rw-r--r-- | src/parser/antlr_input.cpp | 43 | ||||
-rw-r--r-- | src/parser/antlr_input.h | 22 | ||||
-rw-r--r-- | src/parser/antlr_line_buffered_input.cpp | 319 | ||||
-rw-r--r-- | src/parser/antlr_line_buffered_input.h | 22 | ||||
-rw-r--r-- | src/parser/input.h | 3 | ||||
-rw-r--r-- | src/parser/line_buffer.cpp | 88 | ||||
-rw-r--r-- | src/parser/line_buffer.h | 76 |
8 files changed, 395 insertions, 180 deletions
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index ca10de684..a316019fd 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -46,6 +46,8 @@ libcvc4parser_la_SOURCES = \ bounded_token_factory.h \ input.cpp \ input.h \ + line_buffer.cpp \ + line_buffer.h \ memory_mapped_input_buffer.cpp \ memory_mapped_input_buffer.h \ parser.cpp \ diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp index b2f2cdd53..cbdaaa2bf 100644 --- a/src/parser/antlr_input.cpp +++ b/src/parser/antlr_input.cpp @@ -50,23 +50,26 @@ namespace parser { // These functions exactly wrap the antlr3 source inconsistencies. // These are the only location CVC4_ANTLR3_OLD_INPUT_STREAM ifdefs appear. // No other sanity checking happens; -pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, const std::string& name); +pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, + const std::string& name, + LineBuffer* line_buffer); pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name); pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep, uint32_t size, const std::string& name); -pANTLR3_INPUT_STREAM -newAntlr3BufferedStream(std::istream& input, const std::string& name){ +pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, + const std::string& name, + LineBuffer* line_buffer) { pANTLR3_INPUT_STREAM inputStream = NULL; pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str()); #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM inputStream = - antlr3LineBufferedStreamNew(input, 0, name_duplicate); + antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - inputStream = - antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, name_duplicate); + inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, + name_duplicate, line_buffer); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ free(name_duplicate); @@ -107,14 +110,14 @@ pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep, return inputStream; } -AntlrInputStream::AntlrInputStream(std::string name, - pANTLR3_INPUT_STREAM input, +AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input, bool fileIsTemporary, - pANTLR3_UINT8 inputString) : - InputStream(name, fileIsTemporary), - d_input(input), - d_inputString(inputString) -{ + pANTLR3_UINT8 inputString, + LineBuffer* line_buffer) + : InputStream(name, fileIsTemporary), + d_input(input), + d_inputString(inputString), + d_line_buffer(line_buffer) { assert( input != NULL ); input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str()); } @@ -124,6 +127,9 @@ AntlrInputStream::~AntlrInputStream() { if(d_inputString != NULL){ free(d_inputString); } + if (d_line_buffer != NULL) { + delete d_line_buffer; + } } pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const { @@ -150,7 +156,7 @@ AntlrInputStream::newFileInputStream(const std::string& name, if(input == NULL) { throw InputStreamException("Couldn't open file: " + name); } - return new AntlrInputStream( name, input, false, NULL ); + return new AntlrInputStream(name, input, false, NULL, NULL); } @@ -162,9 +168,11 @@ AntlrInputStream::newStreamInputStream(std::istream& input, pANTLR3_INPUT_STREAM inputStream = NULL; pANTLR3_UINT8 inputStringCopy = NULL; + LineBuffer* line_buffer = NULL; if(lineBuffered) { - inputStream = newAntlr3BufferedStream(input, name); + line_buffer = new LineBuffer(&input); + inputStream = newAntlr3BufferedStream(input, name, line_buffer); } else { // Since these are all NULL on entry, realloc will be called @@ -207,7 +215,8 @@ AntlrInputStream::newStreamInputStream(std::istream& input, throw InputStreamException("Couldn't initialize input: " + name); } - return new AntlrInputStream( name, inputStream, false, inputStringCopy ); + return new AntlrInputStream(name, inputStream, false, inputStringCopy, + line_buffer); } @@ -230,7 +239,7 @@ AntlrInputStream::newStringInputStream(const std::string& input, if( inputStream==NULL ) { throw InputStreamException("Couldn't initialize string input: '" + input + "'"); } - return new AntlrInputStream( name, inputStream, false, input_duplicate ); + return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL); } AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) { diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h index 8e5e82811..293be0087 100644 --- a/src/parser/antlr_input.h +++ b/src/parser/antlr_input.h @@ -35,12 +35,12 @@ #include "base/output.h" #include "parser/bounded_token_buffer.h" #include "parser/input.h" +#include "parser/line_buffer.h" #include "parser/parser_exception.h" #include "util/bitvector.h" #include "util/integer.h" #include "util/rational.h" - namespace CVC4 { class Command; @@ -62,10 +62,11 @@ private: */ pANTLR3_UINT8 d_inputString; - AntlrInputStream(std::string name, - pANTLR3_INPUT_STREAM input, - bool fileIsTemporary, - pANTLR3_UINT8 inputString); + LineBuffer* d_line_buffer; + + AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input, + bool fileIsTemporary, pANTLR3_UINT8 inputString, + LineBuffer* line_buffer); /* This is private and unimplemented, because you should never use it. */ AntlrInputStream(const AntlrInputStream& inputStream) CVC4_UNDEFINED; @@ -201,9 +202,6 @@ public: /** Get a bitvector constant from the text of the number and the size token */ static BitVector tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size); - /** Retrieve the remaining text in this input. */ - std::string getUnparsedText(); - /** Get the ANTLR3 lexer for this input. */ pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; } @@ -243,14 +241,6 @@ protected: virtual void setParser(Parser& parser); };/* class AntlrInput */ -inline std::string AntlrInput::getUnparsedText() { - const char *base = (const char *)d_antlr3InputStream->data; - const char *cur = (const char *)d_antlr3InputStream->nextChar; - - return std::string(cur, d_antlr3InputStream->sizeBuf - (cur - base)); -} - - inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) { if( token->type == ANTLR3_TOKEN_EOF ) { return "<<EOF>>"; diff --git a/src/parser/antlr_line_buffered_input.cpp b/src/parser/antlr_line_buffered_input.cpp index 22bbaf1db..e65125ad9 100644 --- a/src/parser/antlr_line_buffered_input.cpp +++ b/src/parser/antlr_line_buffered_input.cpp @@ -2,17 +2,30 @@ /*! \file antlr_line_buffered_input.cpp ** \verbatim ** Top contributors (to current version): - ** Morgan Deters, Tim King + ** Morgan Deters, Tim King, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Add one-line brief description here ]] + ** \brief A custom ANTLR input stream that reads from the input stream lazily ** - ** [[ Add lengthier description here ]] - ** \todo document this file + ** WARNING: edits to this and related files should be done carefully due to the + *interaction with ANTLR internals. + ** + ** This overwrites the _LA and the consume functions of the ANTLR input stream + ** to use a LineBuffer instead of accessing a buffer. The lines are kept in + ** memory to make sure that existing tokens remain valid (tokens store pointers + ** to the corresponding input). We do not overwrite mark(), etc. + *because + ** we can use the line number and the position within that line to index into + *the + ** line buffer and the default markers already store and restore that + ** information. The line buffer guarantees that lines are consecutive in + ** memory, so ANTLR3_INPUT_STREAM::getLineBuf() should work as intended and + ** tokens themselves are consecutive in memory (we are assuming that tokens + ** are not split across multiple lines). **/ // We rely on the inclusion of #include <antlr3.h> in @@ -32,7 +45,8 @@ namespace CVC4 { namespace parser { -static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(std::istream& in); +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream( + std::istream& in, LineBuffer* line_buffer); static void setupInputStream(pANTLR3_INPUT_STREAM input) @@ -206,111 +220,133 @@ setupInputStream(pANTLR3_INPUT_STREAM input) #endif /* 0 */ } -static ANTLR3_UCHAR -myLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { - pANTLR3_INPUT_STREAM input; +static ANTLR3_UCHAR bufferedInputLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + uint8_t* result = line_buffered_input->line_buffer->getPtrWithOffset( + input->line, input->charPositionInLine, la - 1); + return (result != NULL) ? *result : ANTLR3_CHARSTREAM_EOF; +} - input = ((pANTLR3_INPUT_STREAM) (is->super)); +static void bufferedInputRewind(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) { + // This function is essentially the same as the original + // antlr38BitRewind() but does not do any seek. The seek in the + // original function does not do anything and also calls + // antlr38BitSeek() instead of the overloaded seek() function, which + // leads to subtle bugs. + pANTLR3_LEX_STATE state; + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM)is->super); + + // Perform any clean up of the marks + input->istream->release(input->istream, mark); + + // Find the supplied mark state + state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, + (ANTLR3_UINT32)(mark - 1)); + if (state == NULL) { + return; + } + + // Reset the information in the mark + input->charPositionInLine = state->charPositionInLine; + input->currentLine = state->currentLine; + input->line = state->line; + input->nextChar = state->nextChar; +} - Debug("pipe") << "LA" << std::endl; - if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) - { - std::istream& in = *((CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in; - //MGD - // in.clear(); - if(!in) { - Debug("pipe") << "EOF" << std::endl; - return ANTLR3_CHARSTREAM_EOF; - } - Debug("pipe") << "READ" << std::endl; - if(input->data == NULL) { - Debug("pipe") << "ALLOC" << std::endl; - input->data = malloc(1024); - input->nextChar = input->data; - } else { - Debug("pipe") << "REALLOC" << std::endl; - size_t pos = (char*)input->nextChar - (char*)input->data; - input->data = realloc(input->data, input->sizeBuf + 1024); - input->nextChar = (char*)input->data + pos; - } - in.getline((((char*)input->data) + input->sizeBuf), 1024); - while(in.fail() && !in.eof()) { - Debug("pipe") << "input string too long, reallocating" << std::endl; - input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); - size_t pos = (char*)input->nextChar - (char*)input->data; - input->data = realloc(input->data, input->sizeBuf + 1024); - input->nextChar = (char*)input->data + pos; - in.clear(); - in.getline((((char*)input->data) + input->sizeBuf), 1024); - } - input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); - assert(*(((char*)input->data) + input->sizeBuf) == '\0'); - Debug("pipe") << "SIZEBUF now " << input->sizeBuf << std::endl; - *(((char*)input->data) + input->sizeBuf) = '\n'; - ++input->sizeBuf; +static void bufferedInputConsume(pANTLR3_INT_STREAM is) { + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + + uint8_t* current = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + if (current != NULL) { + input->charPositionInLine++; + + if (*current == LineBuffer::NewLineChar) { + // Reset for start of a new line of input + input->line++; + input->charPositionInLine = 0; + input->currentLine = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + Debug("pipe") << "-- newline!" << std::endl; } - Debug("pipe") << "READ POINTER[" << la << "] AT: >>" << std::string(((char*)input->nextChar), input->sizeBuf - (((char*)input->nextChar) - (char*)input->data)) << "<< returning '" << (char)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << "' (" << (unsigned)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << ")" << std::endl; - return (ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1)); + input->nextChar = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + } } - -static void -myConsume(pANTLR3_INT_STREAM is) -{ - pANTLR3_INPUT_STREAM input; - - input = ((pANTLR3_INPUT_STREAM) (is->super)); - - Debug("pipe") << "consume! '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; - if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) - { - /* Indicate one more character in this line - */ - input->charPositionInLine++; - - if ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar) - { - /* Reset for start of a new line of input - */ - input->line++; - input->charPositionInLine = 0; - input->currentLine = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); - Debug("pipe") << "-- newline!" << std::endl; - } - - /* Increment to next character position - */ - input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); - Debug("pipe") << "-- advance nextChar! looking at '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; - } else Debug("pipe") << "-- nothing!" << std::endl; +static void bufferedInputSeek(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) { + // In contrast to the original antlr38BitSeek() function, we only + // support seeking forward (seeking backwards is only supported for + // rewinding in the original code, which we do not do when rewinding, + // so this should be fine). + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + + // Check that we are not seeking backwards. + assert(!line_buffered_input->line_buffer->isPtrBefore( + (uint8_t*)seekPoint, input->line, input->charPositionInLine)); + + ssize_t count = (ssize_t)(seekPoint - (ANTLR3_MARKER)(input->nextChar)); + while (count > 0) { + is->consume(is); + count--; + } } -pANTLR3_INPUT_STREAM -antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UINT8 name) -{ - pANTLR3_INPUT_STREAM input; - - if(!in) { - return NULL; - } +static ANTLR3_UINT32 bufferedInputSize(pANTLR3_INPUT_STREAM input) { + // Not supported for this type of stream + assert(false); + return 0; +} - // First order of business is to set up the stream and install the data pointer. - // Then we will work out the encoding and byte order and adjust the API functions that are installed for the - // default 8Bit stream accordingly. - // - input = antlr3CreateLineBufferedStream(in); - if (input == NULL) - { - return NULL; - } +static void bufferedInputSetNewLineChar(pANTLR3_INPUT_STREAM input, + ANTLR3_UINT32 newlineChar) { + // Not supported for this type of stream + assert(false); +} - // Size (in bytes) of the given 'string' - // - input->sizeBuf = 0; +static void bufferedInputSetUcaseLA(pANTLR3_INPUT_STREAM input, + ANTLR3_BOOLEAN flag) { + // Not supported for this type of stream + assert(false); +} - input->istream->_LA = myLA; - input->istream->consume = myConsume; +pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in, + ANTLR3_UINT32 encoding, + pANTLR3_UINT8 name, + LineBuffer* line_buffer) { + pANTLR3_INPUT_STREAM input; + + if (!in) { + return NULL; + } + + // First order of business is to set up the stream and install the data + // pointer. + // Then we will work out the encoding and byte order and adjust the API + // functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateLineBufferedStream(in, line_buffer); + if (input == NULL) { + return NULL; + } + + input->istream->_LA = bufferedInputLA; + input->istream->consume = bufferedInputConsume; + input->istream->seek = bufferedInputSeek; + input->istream->rewind = bufferedInputRewind; + input->size = bufferedInputSize; + input->SetNewLineChar = bufferedInputSetNewLineChar; + input->setUcaseLA = bufferedInputSetUcaseLA; #ifndef CVC4_ANTLR3_OLD_INPUT_STREAM // We have the data in memory now so we can deal with it according to @@ -326,53 +362,60 @@ antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UI // Now we can set up the file name // - input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); - input->fileName = input->istream->streamName; + input->istream->streamName = + input->strFactory->newStr8(input->strFactory, name); + input->fileName = input->istream->streamName; return input; } -static pANTLR3_INPUT_STREAM -antlr3CreateLineBufferedStream(std::istream& in) -{ - // Pointer to the input stream we are going to create - // - pANTLR3_INPUT_STREAM input; - - if (!in) - { - return NULL; - } - - // Allocate memory for the input stream structure - // - input = (pANTLR3_INPUT_STREAM) - ANTLR3_CALLOC(1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); - - if (input == NULL) - { - return NULL; - } - - // Structure was allocated correctly, now we can install the pointer - // - input->data = malloc(1024); - input->isAllocated = ANTLR3_FALSE; - - ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ - // Call the common 8 bit input stream handler - // initialization. - // +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream( + std::istream& in, LineBuffer* line_buffer) { + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + + if (!in) { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM)ANTLR3_CALLOC( + 1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); + + if (input == NULL) { + return NULL; + } + + // Structure was allocated correctly, now we can install the pointer + // + input->data = NULL; + input->isAllocated = ANTLR3_FALSE; + + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->line_buffer = line_buffer; +// Call the common 8 bit input stream handler +// initialization. +// #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM - antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); + antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - antlr38BitSetupStream(input); - // In some libantlr3c 3.4-beta versions, this call is not included in the above. - // This is probably an erroneously-deleted line in the libantlr3c source since 3.2. - antlr3GenericSetupStream(input); + antlr38BitSetupStream(input); + // In some libantlr3c 3.4-beta versions, this call is not included in the + // above. + // This is probably an erroneously-deleted line in the libantlr3c source since + // 3.2. + antlr3GenericSetupStream(input); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - return input; + input->sizeBuf = 0; + input->newlineChar = LineBuffer::NewLineChar; + input->charPositionInLine = 0; + input->line = 0; + input->nextChar = line_buffer->getPtr(0, 0); + input->currentLine = line_buffer->getPtr(0, 0); + return input; } }/* CVC4::parser namespace */ diff --git a/src/parser/antlr_line_buffered_input.h b/src/parser/antlr_line_buffered_input.h index 2c01a9bde..4535ffe6b 100644 --- a/src/parser/antlr_line_buffered_input.h +++ b/src/parser/antlr_line_buffered_input.h @@ -2,17 +2,22 @@ /*! \file antlr_line_buffered_input.h ** \verbatim ** Top contributors (to current version): - ** Morgan Deters, Tim King + ** Morgan Deters, Tim King, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Add one-line brief description here ]] + ** \brief A custom ANTLR input stream that reads from the input stream lazily ** - ** [[ Add lengthier description here ]] - ** \todo document this file + ** By default, ANTLR expects the whole input to be in a single, consecutive + ** buffer. When doing incremental solving and the input is coming from the + ** standard input, this is problematic because CVC4 might receive new input + ** based on the result of solving the existing input. + ** + ** This file overwrites the _LA and the consume functions of the input streamto + ** achieve that and stores the lines received so far in a LineBuffer. **/ // These headers should be the first two included. @@ -27,16 +32,21 @@ #include <istream> +#include "parser/line_buffer.h" + namespace CVC4 { namespace parser { typedef struct ANTLR3_LINE_BUFFERED_INPUT_STREAM { ANTLR3_INPUT_STREAM antlr; std::istream* in; + LineBuffer* line_buffer; } *pANTLR3_LINE_BUFFERED_INPUT_STREAM; -pANTLR3_INPUT_STREAM -antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UINT8 name); +pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in, + ANTLR3_UINT32 encoding, + pANTLR3_UINT8 name, + LineBuffer* line_buffer); }/* CVC4::parser namespace */ }/* CVC4 namespace */ diff --git a/src/parser/input.h b/src/parser/input.h index 7dce369c5..6aec8a2b1 100644 --- a/src/parser/input.h +++ b/src/parser/input.h @@ -140,9 +140,6 @@ public: /** Destructor. Frees the input stream and closes the input. */ virtual ~Input(); - /** Retrieve the remaining text in this input. */ - virtual std::string getUnparsedText() = 0; - /** Get the language that this Input is reading. */ virtual InputLanguage getLanguage() const throw() = 0; diff --git a/src/parser/line_buffer.cpp b/src/parser/line_buffer.cpp new file mode 100644 index 000000000..cdb06193e --- /dev/null +++ b/src/parser/line_buffer.cpp @@ -0,0 +1,88 @@ +/********************* */ +/*! \file line_buffer.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief The LineBuffer class stores lines from an input stream + ** + ** For each line, the class allocates a separate buffer. + **/ + +#include "parser/line_buffer.h" + +#include <cassert> +#include <cstring> +#include <iostream> +#include <string> + +namespace CVC4 { +namespace parser { + +LineBuffer::LineBuffer(std::istream* stream) : d_stream(stream) {} + +LineBuffer::~LineBuffer() { + for (size_t i = 0; i < d_lines.size(); i++) { + delete[] d_lines[i]; + } +} + +uint8_t* LineBuffer::getPtr(size_t line, size_t pos_in_line) { + if (!readToLine(line)) { + return NULL; + } + assert(pos_in_line < d_sizes[line]); + return d_lines[line] + pos_in_line; +} + +uint8_t* LineBuffer::getPtrWithOffset(size_t line, size_t pos_in_line, + size_t offset) { + if (!readToLine(line)) { + return NULL; + } + if (pos_in_line + offset >= d_sizes[line]) { + return getPtrWithOffset(line + 1, 0, + offset - (d_sizes[line] - pos_in_line - 1)); + } + assert(pos_in_line + offset < d_sizes[line]); + return d_lines[line] + pos_in_line + offset; +} + +bool LineBuffer::isPtrBefore(uint8_t* ptr, size_t line, size_t pos_in_line) { + for (ssize_t i = line; i >= 0; i--) { + // NOTE: std::less is guaranteed to give consistent results when comparing + // pointers of different arrays (in contrast to built-in comparison + // operators). + uint8_t* end = d_lines[i] + ((i == line) ? pos_in_line : d_sizes[i]); + if (std::less<uint8_t*>()(d_lines[i] - 1, ptr) && + std::less<uint8_t*>()(ptr, end)) { + return true; + } + } + return false; +} + +bool LineBuffer::readToLine(size_t line) { + while (line >= d_lines.size()) { + if (!(*d_stream)) { + return false; + } + + std::string line; + std::getline(*d_stream, line); + uint8_t* segment = new uint8_t[line.size() + 1]; + std::memcpy(segment, line.c_str(), line.size()); + segment[line.size()] = LineBuffer::NewLineChar; + d_lines.push_back(segment); + d_sizes.push_back(line.size() + 1); + } + return true; +} + +} // namespace parser +} // namespace CVC4 diff --git a/src/parser/line_buffer.h b/src/parser/line_buffer.h new file mode 100644 index 000000000..d7ccb5a10 --- /dev/null +++ b/src/parser/line_buffer.h @@ -0,0 +1,76 @@ +/********************* */ +/*! \file line_buffer.h + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief The LineBuffer class stores lines from an input stream + ** + ** Each line is guaranteed to be consecutive in memory. The content in + ** the line buffer can be addressed using line number and the position + ** within the line. + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__LINE_BUFFER_H +#define __CVC4__PARSER__LINE_BUFFER_H + +#include <cstdlib> +#include <istream> +#include <vector> + +namespace CVC4 { +namespace parser { + +class LineBuffer { + public: + static const uint8_t NewLineChar = '\n'; + + LineBuffer(std::istream* stream); + ~LineBuffer(); + + /** + * Gets a pointer to a char at a specific line and position within that + * line. + */ + uint8_t* getPtr(size_t line, size_t pos_in_line); + + /** + * Gets a pointer to a char at an offset relative to a specific line and + * position within that line. + */ + uint8_t* getPtrWithOffset(size_t line, size_t pos_in_line, size_t offset); + + /** + * Tests whether a given pointer points to a location before a given + * line and position within that line. + */ + bool isPtrBefore(uint8_t* ptr, size_t line, size_t pos_in_line); + + private: + /** + * Reads lines up to a line number from the input if needed (it does + * nothing for the lines that were already read). Returns false if the end + * of the input stream has been reached and not all lines could be read. + */ + bool readToLine(size_t line); + + std::istream* d_stream; + // Each element in this vector corresponds to a line from the input stream. + // WARNING: not null-terminated. + std::vector<uint8_t*> d_lines; + // Each element in this vector corresponds to the length of a line from the + // input stream. + std::vector<size_t> d_sizes; +}; + +} // namespace parser +} // namespace CVC4 + +#endif /* __CVC4__PARSER__LINE_BUFFER_H */ |