summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2010-06-11 12:12:39 -0700
committerJoshua Haberman <joshua@reverberate.org>2010-06-11 12:12:39 -0700
commitedd1f5a61f3323992072fb71b6088f7ec485ff7d (patch)
tree68991dc3284f7723b507fe83195698de3b61e920 /src
parentd7e631d9b05e19662802ac8f0727adff0a2d9f98 (diff)
Work on decoder buffering.
Diffstat (limited to 'src')
-rw-r--r--src/upb_decoder.c148
-rw-r--r--src/upb_srcsink.h5
2 files changed, 85 insertions, 68 deletions
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 3a1c8f9..601c4c8 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -42,19 +42,21 @@ struct upb_decoder {
// We keep a stack of messages we have recursed into.
upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit;
- // The buffers of input data. See buffering code below for details.
+ // The buffer of input data. NULL is equivalent to the empty string.
upb_string *buf;
- upb_string *nextbuf;
- uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; // Used to bridge buf and nextbuf.
+
+ // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain.
+ uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE];
// The number of bytes we have yet to consume from "buf". This can be
// negative if we have skipped more bytes than are in the buffer, or if we
// have started to consume bytes from "nextbuf".
int32_t buf_bytesleft;
+ int32_t buf_offset;
// The overall stream offset of the end of "buf". If "buf" is NULL, it is as
// if "buf" was the empty string.
- uint32_t buf_endoffset;
+ uint32_t buf_stream_offset;
// Fielddef for the key we just read.
upb_fielddef *field;
@@ -76,29 +78,39 @@ struct upb_decoder {
static upb_strlen_t upb_decoder_offset(upb_decoder *d)
{
- return d->buf_endoffset - d->buf_bytesleft;
+ return d->buf_stream_offset - d->buf_offset;
}
-// Discards the current buffer if we are done with it, make the next buffer
-// current if there is one.
-static void upb_decoder_advancebuf(upb_decoder *d)
+static bool upb_decoder_nextbuf(upb_decoder *d)
{
- if(d->buf_bytesleft <= 0) {
- if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf);
- d->buf = d->nextbuf;
- d->nextbuf = NULL;
- if(d->buf) d->buf_bytesleft += upb_string_len(d->buf);
+ assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE);
+
+ // Copy residual bytes to temporary buffer.
+ if(d->buf_bytesleft > 0) {
+ memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset,
+ d->buf_bytesleft);
}
-}
-static bool upb_decoder_pullnextbuf(upb_decoder *d)
-{
- if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) {
- d->nextbuf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE);
- if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) {
- // There was an error in the byte stream, halt the decoder.
+ // Recycle old buffer, pull new one.
+ if(d->buf) {
+ upb_bytesrc_recycle(d->bytesrc, d->buf);
+ d->buf_offset -= upb_string_len(d->buf);
+ d->buf_stream_offset += upb_string_len(d->buf);
+ }
+ d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE);
+
+ // Handle cases arising from error or EOF.
+ if(d->buf) {
+ d->buf_bytesleft += upb_string_len(d->buf);
+ } else {
+ if(!upb_bytesrc_eof(d->bytesrc)) {
+ // Error from bytesrc.
upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
return false;
+ } else if(d->buf_bytesleft == 0) {
+ // EOF from bytesrc and we don't have any residual bytes left.
+ d->src.eof = true;
+ return false;
}
}
return true;
@@ -106,11 +118,7 @@ static bool upb_decoder_pullnextbuf(upb_decoder *d)
static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
{
- d->buf_bytesleft -= bytes;
- while(d->buf_bytesleft <= 0 && !upb_bytesrc_eof(d->bytesrc)) {
- if(!upb_decoder_pullnextbuf(d)) return false;
- upb_decoder_advancebuf(d);
- }
+ // TODO.
return true;
}
@@ -124,22 +132,28 @@ static upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf,
static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
{
- if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) {
- upb_decoder_pullnextbuf(d);
- upb_decoder_advancebuf(d);
- }
- if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) {
+ if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE)
+ if(!upb_decoder_nextbuf(d)) return NULL;
+
+ assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE);
+
+ if(d->buf_offset >= 0) {
+ // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
+ // contiguous bytes, so we can read directly out of it.
*bytes = d->buf_bytesleft;
- return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) -
- d->buf_bytesleft;
+ return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
} else {
- upb_strlen_t len = 0;
- if(d->buf)
- len += upb_decoder_append(d->tmpbuf, d->buf, len, UPB_MAX_ENCODED_SIZE);
- if(d->nextbuf)
- len += upb_decoder_append(d->tmpbuf, d->nextbuf, len, UPB_MAX_ENCODED_SIZE);
- *bytes = len;
- memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len);
+ upb_strlen_t residual_bytes = -d->buf_offset;
+ if(d->buf) {
+ memcpy(d->tmpbuf + residual_bytes, upb_string_getrobuf(d->buf),
+ UPB_MAX_ENCODED_SIZE - residual_bytes);
+ *bytes = 10;
+ } else {
+ // All we have are residual bytes; pad them with 0x80.
+ memset(d->tmpbuf + residual_bytes, 0x80,
+ UPB_MAX_ENCODED_SIZE - residual_bytes);
+ *bytes = residual_bytes;
+ }
return d->tmpbuf;
}
}
@@ -154,12 +168,11 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
// indicate how many bytes were consumed.
static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes)
{
- if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) {
- // The common case; only when we get to the last ten bytes of the buffer
- // do we have to do tricky things.
+ if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) {
+ // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
+ // contiguous bytes, so we can read directly out of it.
*bytes = d->buf_bytesleft;
- return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) -
- d->buf_bytesleft;
+ return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
} else {
return upb_decoder_getbuf_full(d, bytes);
}
@@ -168,9 +181,13 @@ static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes)
static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes)
{
assert(bytes <= UPB_MAX_ENCODED_SIZE);
- //if()
+ d->buf_offset += bytes;
d->buf_bytesleft -= bytes;
- //if(d->buf_bytesleft > upb_string_length())
+ if(d->buf_offset < 0) {
+ // We still have residual bytes we have not consumed.
+ memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset);
+ }
+ return true;
}
@@ -208,8 +225,7 @@ INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high)
return false;
done:
- upb_decoder_consume(d, buf - start);
- return true;
+ return upb_decoder_consume(d, buf - start);
}
// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit
@@ -291,7 +307,6 @@ static uint8_t upb_decoder_skipv64(upb_decoder *d)
/* upb_src implementation for upb_decoder. ************************************/
-bool upb_decoder_get_v_uint32(upb_decoder *d, uint32_t *key);
bool upb_decoder_skipval(upb_decoder *d);
upb_fielddef *upb_decoder_getdef(upb_decoder *d)
@@ -365,20 +380,20 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
d->buf_bytesleft -= total_len;
*val.str = d->str;
} else {
- // The string spans buffers, so we must copy from the current buffer,
- // the next buffer (if we have one), and finally from the bytesrc.
- uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len);
- upb_strlen_t len = 0;
- len += upb_decoder_append(str, d->buf, len, total_len);
- upb_decoder_advancebuf(d);
- if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len);
- upb_string_getrwbuf(d->str, len); // Cheap resize.
- if(len < total_len) {
- if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) {
- upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
- return false;
- }
- }
+ //// The string spans buffers, so we must copy from the current buffer,
+ //// the next buffer (if we have one), and finally from the bytesrc.
+ //uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len);
+ //upb_strlen_t len = 0;
+ //len += upb_decoder_append(str, d->buf, len, total_len);
+ //upb_decoder_advancebuf(d);
+ //if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len);
+ //upb_string_getrwbuf(d->str, len); // Cheap resize.
+ //if(len < total_len) {
+ // if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) {
+ // upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
+ // return false;
+ // }
+ //}
}
d->field = NULL;
} else {
@@ -521,7 +536,6 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING];
d->buf = NULL;
- d->nextbuf = NULL;
d->str = upb_string_new();
upb_src_init(&d->src, &upb_decoder_src_vtbl);
return d;
@@ -535,7 +549,6 @@ void upb_decoder_free(upb_decoder *d)
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
{
if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf);
- if(d->nextbuf) upb_bytesrc_recycle(d->bytesrc, d->nextbuf);
d->top = d->stack;
d->top->msgdef = d->toplevel_msgdef;
// The top-level message is not delimited (we can keep receiving data for it
@@ -544,8 +557,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
d->top->end_offset = UINT32_MAX - 1;
d->bytesrc = bytesrc;
d->buf = NULL;
- d->nextbuf = NULL;
d->buf_bytesleft = 0;
- d->buf_endoffset = 0;
+ d->buf_stream_offset = 0;
+ d->buf_offset = 0;
}
-
diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h
index 8e5a09d..199149d 100644
--- a/src/upb_srcsink.h
+++ b/src/upb_srcsink.h
@@ -26,6 +26,11 @@
extern "C" {
#endif
+// Note! The "eof" flags work like feof() in C; they cannot report end-of-file
+// until a read has failed due to eof. They cannot preemptively tell you that
+// the next call will fail due to eof. Since these are the semantics that C
+// and UNIX provide, we're stuck with them if we want to support eg. stdio.
+
/* upb_src ********************************************************************/
// TODO: decide how to handle unknown fields.
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback