diff options
Diffstat (limited to 'src/parser/antlr_line_buffered_input.cpp')
-rw-r--r-- | src/parser/antlr_line_buffered_input.cpp | 319 |
1 files changed, 181 insertions, 138 deletions
diff --git a/src/parser/antlr_line_buffered_input.cpp b/src/parser/antlr_line_buffered_input.cpp index 22bbaf1db..e65125ad9 100644 --- a/src/parser/antlr_line_buffered_input.cpp +++ b/src/parser/antlr_line_buffered_input.cpp @@ -2,17 +2,30 @@ /*! \file antlr_line_buffered_input.cpp ** \verbatim ** Top contributors (to current version): - ** Morgan Deters, Tim King + ** Morgan Deters, Tim King, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Add one-line brief description here ]] + ** \brief A custom ANTLR input stream that reads from the input stream lazily ** - ** [[ Add lengthier description here ]] - ** \todo document this file + ** WARNING: edits to this and related files should be done carefully due to the + *interaction with ANTLR internals. + ** + ** This overwrites the _LA and the consume functions of the ANTLR input stream + ** to use a LineBuffer instead of accessing a buffer. The lines are kept in + ** memory to make sure that existing tokens remain valid (tokens store pointers + ** to the corresponding input). We do not overwrite mark(), etc. + *because + ** we can use the line number and the position within that line to index into + *the + ** line buffer and the default markers already store and restore that + ** information. The line buffer guarantees that lines are consecutive in + ** memory, so ANTLR3_INPUT_STREAM::getLineBuf() should work as intended and + ** tokens themselves are consecutive in memory (we are assuming that tokens + ** are not split across multiple lines). **/ // We rely on the inclusion of #include <antlr3.h> in @@ -32,7 +45,8 @@ namespace CVC4 { namespace parser { -static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(std::istream& in); +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream( + std::istream& in, LineBuffer* line_buffer); static void setupInputStream(pANTLR3_INPUT_STREAM input) @@ -206,111 +220,133 @@ setupInputStream(pANTLR3_INPUT_STREAM input) #endif /* 0 */ } -static ANTLR3_UCHAR -myLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { - pANTLR3_INPUT_STREAM input; +static ANTLR3_UCHAR bufferedInputLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + uint8_t* result = line_buffered_input->line_buffer->getPtrWithOffset( + input->line, input->charPositionInLine, la - 1); + return (result != NULL) ? *result : ANTLR3_CHARSTREAM_EOF; +} - input = ((pANTLR3_INPUT_STREAM) (is->super)); +static void bufferedInputRewind(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) { + // This function is essentially the same as the original + // antlr38BitRewind() but does not do any seek. The seek in the + // original function does not do anything and also calls + // antlr38BitSeek() instead of the overloaded seek() function, which + // leads to subtle bugs. + pANTLR3_LEX_STATE state; + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM)is->super); + + // Perform any clean up of the marks + input->istream->release(input->istream, mark); + + // Find the supplied mark state + state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, + (ANTLR3_UINT32)(mark - 1)); + if (state == NULL) { + return; + } + + // Reset the information in the mark + input->charPositionInLine = state->charPositionInLine; + input->currentLine = state->currentLine; + input->line = state->line; + input->nextChar = state->nextChar; +} - Debug("pipe") << "LA" << std::endl; - if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) - { - std::istream& in = *((CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in; - //MGD - // in.clear(); - if(!in) { - Debug("pipe") << "EOF" << std::endl; - return ANTLR3_CHARSTREAM_EOF; - } - Debug("pipe") << "READ" << std::endl; - if(input->data == NULL) { - Debug("pipe") << "ALLOC" << std::endl; - input->data = malloc(1024); - input->nextChar = input->data; - } else { - Debug("pipe") << "REALLOC" << std::endl; - size_t pos = (char*)input->nextChar - (char*)input->data; - input->data = realloc(input->data, input->sizeBuf + 1024); - input->nextChar = (char*)input->data + pos; - } - in.getline((((char*)input->data) + input->sizeBuf), 1024); - while(in.fail() && !in.eof()) { - Debug("pipe") << "input string too long, reallocating" << std::endl; - input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); - size_t pos = (char*)input->nextChar - (char*)input->data; - input->data = realloc(input->data, input->sizeBuf + 1024); - input->nextChar = (char*)input->data + pos; - in.clear(); - in.getline((((char*)input->data) + input->sizeBuf), 1024); - } - input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); - assert(*(((char*)input->data) + input->sizeBuf) == '\0'); - Debug("pipe") << "SIZEBUF now " << input->sizeBuf << std::endl; - *(((char*)input->data) + input->sizeBuf) = '\n'; - ++input->sizeBuf; +static void bufferedInputConsume(pANTLR3_INT_STREAM is) { + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + + uint8_t* current = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + if (current != NULL) { + input->charPositionInLine++; + + if (*current == LineBuffer::NewLineChar) { + // Reset for start of a new line of input + input->line++; + input->charPositionInLine = 0; + input->currentLine = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + Debug("pipe") << "-- newline!" << std::endl; } - Debug("pipe") << "READ POINTER[" << la << "] AT: >>" << std::string(((char*)input->nextChar), input->sizeBuf - (((char*)input->nextChar) - (char*)input->data)) << "<< returning '" << (char)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << "' (" << (unsigned)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << ")" << std::endl; - return (ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1)); + input->nextChar = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + } } - -static void -myConsume(pANTLR3_INT_STREAM is) -{ - pANTLR3_INPUT_STREAM input; - - input = ((pANTLR3_INPUT_STREAM) (is->super)); - - Debug("pipe") << "consume! '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; - if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) - { - /* Indicate one more character in this line - */ - input->charPositionInLine++; - - if ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar) - { - /* Reset for start of a new line of input - */ - input->line++; - input->charPositionInLine = 0; - input->currentLine = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); - Debug("pipe") << "-- newline!" << std::endl; - } - - /* Increment to next character position - */ - input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); - Debug("pipe") << "-- advance nextChar! looking at '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; - } else Debug("pipe") << "-- nothing!" << std::endl; +static void bufferedInputSeek(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) { + // In contrast to the original antlr38BitSeek() function, we only + // support seeking forward (seeking backwards is only supported for + // rewinding in the original code, which we do not do when rewinding, + // so this should be fine). + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + + // Check that we are not seeking backwards. + assert(!line_buffered_input->line_buffer->isPtrBefore( + (uint8_t*)seekPoint, input->line, input->charPositionInLine)); + + ssize_t count = (ssize_t)(seekPoint - (ANTLR3_MARKER)(input->nextChar)); + while (count > 0) { + is->consume(is); + count--; + } } -pANTLR3_INPUT_STREAM -antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UINT8 name) -{ - pANTLR3_INPUT_STREAM input; - - if(!in) { - return NULL; - } +static ANTLR3_UINT32 bufferedInputSize(pANTLR3_INPUT_STREAM input) { + // Not supported for this type of stream + assert(false); + return 0; +} - // First order of business is to set up the stream and install the data pointer. - // Then we will work out the encoding and byte order and adjust the API functions that are installed for the - // default 8Bit stream accordingly. - // - input = antlr3CreateLineBufferedStream(in); - if (input == NULL) - { - return NULL; - } +static void bufferedInputSetNewLineChar(pANTLR3_INPUT_STREAM input, + ANTLR3_UINT32 newlineChar) { + // Not supported for this type of stream + assert(false); +} - // Size (in bytes) of the given 'string' - // - input->sizeBuf = 0; +static void bufferedInputSetUcaseLA(pANTLR3_INPUT_STREAM input, + ANTLR3_BOOLEAN flag) { + // Not supported for this type of stream + assert(false); +} - input->istream->_LA = myLA; - input->istream->consume = myConsume; +pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in, + ANTLR3_UINT32 encoding, + pANTLR3_UINT8 name, + LineBuffer* line_buffer) { + pANTLR3_INPUT_STREAM input; + + if (!in) { + return NULL; + } + + // First order of business is to set up the stream and install the data + // pointer. + // Then we will work out the encoding and byte order and adjust the API + // functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateLineBufferedStream(in, line_buffer); + if (input == NULL) { + return NULL; + } + + input->istream->_LA = bufferedInputLA; + input->istream->consume = bufferedInputConsume; + input->istream->seek = bufferedInputSeek; + input->istream->rewind = bufferedInputRewind; + input->size = bufferedInputSize; + input->SetNewLineChar = bufferedInputSetNewLineChar; + input->setUcaseLA = bufferedInputSetUcaseLA; #ifndef CVC4_ANTLR3_OLD_INPUT_STREAM // We have the data in memory now so we can deal with it according to @@ -326,53 +362,60 @@ antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UI // Now we can set up the file name // - input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); - input->fileName = input->istream->streamName; + input->istream->streamName = + input->strFactory->newStr8(input->strFactory, name); + input->fileName = input->istream->streamName; return input; } -static pANTLR3_INPUT_STREAM -antlr3CreateLineBufferedStream(std::istream& in) -{ - // Pointer to the input stream we are going to create - // - pANTLR3_INPUT_STREAM input; - - if (!in) - { - return NULL; - } - - // Allocate memory for the input stream structure - // - input = (pANTLR3_INPUT_STREAM) - ANTLR3_CALLOC(1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); - - if (input == NULL) - { - return NULL; - } - - // Structure was allocated correctly, now we can install the pointer - // - input->data = malloc(1024); - input->isAllocated = ANTLR3_FALSE; - - ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ - // Call the common 8 bit input stream handler - // initialization. - // +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream( + std::istream& in, LineBuffer* line_buffer) { + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + + if (!in) { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM)ANTLR3_CALLOC( + 1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); + + if (input == NULL) { + return NULL; + } + + // Structure was allocated correctly, now we can install the pointer + // + input->data = NULL; + input->isAllocated = ANTLR3_FALSE; + + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->line_buffer = line_buffer; +// Call the common 8 bit input stream handler +// initialization. +// #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM - antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); + antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - antlr38BitSetupStream(input); - // In some libantlr3c 3.4-beta versions, this call is not included in the above. - // This is probably an erroneously-deleted line in the libantlr3c source since 3.2. - antlr3GenericSetupStream(input); + antlr38BitSetupStream(input); + // In some libantlr3c 3.4-beta versions, this call is not included in the + // above. + // This is probably an erroneously-deleted line in the libantlr3c source since + // 3.2. + antlr3GenericSetupStream(input); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - return input; + input->sizeBuf = 0; + input->newlineChar = LineBuffer::NewLineChar; + input->charPositionInLine = 0; + input->line = 0; + input->nextChar = line_buffer->getPtr(0, 0); + input->currentLine = line_buffer->getPtr(0, 0); + return input; } }/* CVC4::parser namespace */ |