diff options
author | Morgan Deters <mdeters@gmail.com> | 2012-03-01 14:48:04 +0000 |
---|---|---|
committer | Morgan Deters <mdeters@gmail.com> | 2012-03-01 14:48:04 +0000 |
commit | 45a138c326da72890bf889a3670aad503ef4aa1e (patch) | |
tree | fa0c9a8497d0b33f78a9f19212152a61392825cc /src/parser/antlr_line_buffered_input.cpp | |
parent | 8c0b2d6db32103268f84d89c0d0545c7eb504069 (diff) |
Partial merge from kind-backend branch, including Minisat and CNF work to
support incrementality.
Some clean-up work will likely follow, but the CNF/Minisat stuff should be
left pretty much untouched.
Expected performance change negligible; slightly better on memory:
http://church.cims.nyu.edu/regress-results/compare_jobs.php?job_id=3705&reference_id=3697&mode=&category=&p=5
Note that there are crashes, but that these are exhibited in the nightly
regression run too!
Diffstat (limited to 'src/parser/antlr_line_buffered_input.cpp')
-rw-r--r-- | src/parser/antlr_line_buffered_input.cpp | 379 |
1 files changed, 379 insertions, 0 deletions
diff --git a/src/parser/antlr_line_buffered_input.cpp b/src/parser/antlr_line_buffered_input.cpp new file mode 100644 index 000000000..b42562f72 --- /dev/null +++ b/src/parser/antlr_line_buffered_input.cpp @@ -0,0 +1,379 @@ +/********************* */ +/*! \file antlr_line_buffered_input.cpp + ** \verbatim + ** Original author: mdeters + ** Major contributors: none + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief [[ Add one-line brief description here ]] + ** + ** [[ Add lengthier description here ]] + ** \todo document this file + **/ + +#include <antlr3.h> +#include <iostream> +#include <string> + +#include "util/output.h" +#include "util/Assert.h" + +namespace CVC4 { +namespace parser { + +typedef struct ANTLR3_LINE_BUFFERED_INPUT_STREAM { + ANTLR3_INPUT_STREAM antlr; + std::istream* in; +} *pANTLR3_LINE_BUFFERED_INPUT_STREAM; + +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(std::istream& in); + +static void +setupInputStream(pANTLR3_INPUT_STREAM input) +{ +#if 0 + ANTLR3_BOOLEAN isBigEndian; + + // Used to determine the endianness of the machine we are currently + // running on. + // + ANTLR3_UINT16 bomTest = 0xFEFF; + + // What endianess is the machine we are running on? If the incoming + // encoding endianess is the same as this machine's natural byte order + // then we can use more efficient API calls. + // + if (*((pANTLR3_UINT8)(&bomTest)) == 0xFE) + { + isBigEndian = ANTLR3_TRUE; + } + else + { + isBigEndian = ANTLR3_FALSE; + } + + // What encoding did the user tell us {s}he thought it was? I am going + // to get sick of the questions on antlr-interest, I know I am. + // + switch (input->encoding) + { + case ANTLR3_ENC_UTF8: + + // See if there is a BOM at the start of this UTF-8 sequence + // and just eat it if there is. Windows .TXT files have this for instance + // as it identifies UTF-8 even though it is of no consequence for byte order + // as UTF-8 does not have a byte order. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xEF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xBB + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xBF + ) + { + // The UTF8 BOM is present so skip it + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3); + } + + // Install the UTF8 input routines + // + antlr3UTF8SetupStream(input); + break; + + case ANTLR3_ENC_UTF16: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine (or it is really UCS2). If there is a BOM we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); + + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); + } + else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE + ) + { + // BOM present, indicates Little Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); + + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); + } + else + { + // No BOM present, assume local computer byte order + // + antlr3UTF16SetupStream(input, isBigEndian, isBigEndian); + } + break; + + case ANTLR3_ENC_UTF32: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine. If there is we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); + + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); + } + else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + ) + { + // BOM present, indicates Little Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); + + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); + } + else + { + // No BOM present, assume local computer byte order + // + antlr3UTF32SetupStream(input, isBigEndian, isBigEndian); + } + break; + + case ANTLR3_ENC_UTF16BE: + + // Encoding is definately Big Endian with no BOM + // + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); + break; + + case ANTLR3_ENC_UTF16LE: + + // Encoding is definately Little Endian with no BOM + // + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); + break; + + case ANTLR3_ENC_UTF32BE: + + // Encoding is definately Big Endian with no BOM + // + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); + break; + + case ANTLR3_ENC_UTF32LE: + + // Encoding is definately Little Endian with no BOM + // + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); + break; + + case ANTLR3_ENC_EBCDIC: + + // EBCDIC is basically the same as ASCII but with an on the + // fly translation to ASCII + // + antlr3EBCDICSetupStream(input); + break; + + case ANTLR3_ENC_8BIT: + default: + + // Standard 8bit/ASCII + // + antlr38BitSetupStream(input); + break; + } +#endif /* 0 */ +} + +static ANTLR3_UCHAR +myLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + Debug("pipe") << "LA" << std::endl; + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + std::istream& in = *((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in; + if(!in) { + Debug("pipe") << "EOF" << std::endl; + return ANTLR3_CHARSTREAM_EOF; + } + Debug("pipe") << "READ" << std::endl; + if(input->data == NULL) { + Debug("pipe") << "ALLOC" << std::endl; + input->data = malloc(1024); + input->nextChar = input->data; + } else { + Debug("pipe") << "REALLOC" << std::endl; + size_t pos = (char*)input->nextChar - (char*)input->data; + input->data = realloc(input->data, input->sizeBuf + 1024); + input->nextChar = (char*)input->data + pos; + } + in.getline((((char*)input->data) + input->sizeBuf), 1024); + while(in.fail() && !in.eof()) { + Debug("pipe") << "input string too long, reallocating" << std::endl; + input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); + size_t pos = (char*)input->nextChar - (char*)input->data; + input->data = realloc(input->data, input->sizeBuf + 1024); + input->nextChar = (char*)input->data + pos; + in.clear(); + in.getline((((char*)input->data) + input->sizeBuf), 1024); + } + input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); + Assert(*(((char*)input->data) + input->sizeBuf) == '\0'); + Debug("pipe") << "SIZEBUF now " << input->sizeBuf << std::endl; + *(((char*)input->data) + input->sizeBuf) = '\n'; + ++input->sizeBuf; + } + + Debug("pipe") << "READ POINTER[" << la << "] AT: >>" << std::string(((char*)input->nextChar), input->sizeBuf - (((char*)input->nextChar) - (char*)input->data) + 1) << "<< returning '" << (char)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << "' (" << (unsigned)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << ")" << std::endl; + return (ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1)); +} + + +static void +myConsume(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + Debug("pipe") << "consume! '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + /* Indicate one more character in this line + */ + input->charPositionInLine++; + + if ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar) + { + /* Reset for start of a new line of input + */ + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); + Debug("pipe") << "-- newline!" << std::endl; + } + + /* Increment to next character position + */ + input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); + Debug("pipe") << "-- advance nextChar! looking at '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; + } else Debug("pipe") << "-- nothing!" << std::endl; +} + +pANTLR3_INPUT_STREAM +antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UINT8 name) +{ + pANTLR3_INPUT_STREAM input; + + if(!in) { + return NULL; + } + + // First order of business is to set up the stream and install the data pointer. + // Then we will work out the encoding and byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateLineBufferedStream(in); + if (input == NULL) + { + return NULL; + } + + // Size (in bytes) of the given 'string' + // + input->sizeBuf = 0; + + input->istream->_LA = myLA; + input->istream->consume = myConsume; + +#ifndef CVC4_ANTLR3_OLD_INPUT_STREAM + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + input->encoding = encoding; +#endif /* ! CVC4_ANTLR3_OLD_INPUT_STREAM */ + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + setupInputStream(input); + + // Now we can set up the file name + // + input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); + input->fileName = input->istream->streamName; + + return input; +} + +static pANTLR3_INPUT_STREAM +antlr3CreateLineBufferedStream(std::istream& in) +{ + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + + if (!in) + { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) + ANTLR3_CALLOC(1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); + + if (input == NULL) + { + return NULL; + } + + // Structure was allocated correctly, now we can install the pointer + // + input->data = malloc(1024); + input->isAllocated = ANTLR3_FALSE; + + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ + + // Call the common 8 bit input stream handler + // initialization. + // +#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM + antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); +#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ + antlr38BitSetupStream(input); + // In some libantlr3c 3.4-beta versions, this call is not included in the above. + // This is probably an erroneously-deleted line in the libantlr3c source since 3.2. + antlr3GenericSetupStream(input); +#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ + + return input; +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + |