summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE4
-rw-r--r--src/upb_decoder.c179
-rw-r--r--src/upb_decoder.h27
-rw-r--r--src/upb_srcsink.h3
4 files changed, 113 insertions, 100 deletions
diff --git a/LICENSE b/LICENSE
index 159a95a..b8b44d3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
-Copyright (c) 2009, Joshua Haberman
-Copyright (c) 2009, Google Inc.
+Copyright (c) 2009-2010, Joshua Haberman
+Copyright (c) 2009-2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 73f8e9b..58e6bfa 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -18,7 +18,7 @@ const int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val);
// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10
// bytes are available at buf. Returns the number of bytes consumed, or 11 if
// the varint was unterminated after 10 bytes.
-INLINE int8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val)
+INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val)
{
// We inline this common case (1-byte varints), if that fails we dispatch to
// the full (non-inlined) version.
@@ -33,7 +33,7 @@ INLINE int8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val)
// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit
// varint is not a true wire type.
-INLINE int8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val)
+INLINE uint8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val)
{
uint64_t val64;
int8_t ret = upb_get_v_uint64(buf, end, &val64, status);
@@ -54,7 +54,8 @@ INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val)
#endif
}
-// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
+// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller
+// promises that 8 bytes are available at buf.
INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val)
{
#if UPB_UNALIGNED_READS_OK
@@ -67,9 +68,10 @@ INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val)
#endif
}
-INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf,
- const uint8_t *end,
- upb_status *status)
+// Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10
+// bytes are available at "buf". Returns the number of bytes that were
+// skipped.
+INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf)
{
const uint8_t *const maxend = buf + 10;
uint8_t last = 0x80;
@@ -82,7 +84,7 @@ INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf,
// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
// handles 1 and 2 byte varints).
-const int8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val)
+const uint8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val)
{
const uint8_t *const maxend = buf + 9;
uint8_t last = 0x80;
@@ -102,7 +104,7 @@ INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
-/* Functions to read .proto values. *******************************************/
+/* upb_decoder ****************************************************************/
// The decoder keeps a stack with one entry per level of recursion.
// upb_decoder_frame is one frame of that stack.
@@ -113,36 +115,30 @@ typedef struct {
} upb_decoder_frame;
struct upb_decoder {
- // Immutable state of the decoder.
+ upb_src src; // upb_decoder is a upb_src.
+
upb_msgdef *toplevel_msgdef;
upb_bytesrc *bytesrc;
- // State pertaining to a particular decode (resettable).
- // Stack entries store the offset where the submsg ends (for groups, 0).
+ // We keep a stack of messages we have recursed into.
upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit;
- // The current buffer.
+ // The buffers of input data. See buffering code below for details.
upb_string *buf;
+ upb_string *nextbuf;
+ uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; // Used to bridge buf and nextbuf.
- // The overflow buffer. Used when fewer than UPB_MAX_ENCODED_SIZE bytes
- // are left in a buffer, the remaining bytes are copied here along with
- // the bytes from the next buffer (or 0x80 if the byte stream is EOF).
- uint8_t overflow_buf[UPB_MAX_ENCODED_SIZE];
-
- // The number of bytes we have yet to consume from this buffer.
- int32_t buf_bytes_remaining;
+ // The number of bytes we have yet to consume from "buf". This can be
+ // negative if we have skipped more bytes than are in the buffer, or if we
+ // have started to consume bytes from "nextbuf".
+ int32_t buf_bytesleft;
- // The overall stream offset of the beginning of this buffer.
+ // The overall stream offset of the end of "buf". If "buf" is NULL, it is as
+ // if "buf" was the empty string.
uint32_t buf_stream_offset;
-
- // Indicates that we are in the middle of skipping bytes or groups (or both).
- // If both are set, the byte-skipping needs to happen first.
- uint8_t skip_groups;
- uint32_t skip_bytes;
-
- bool eof;
};
+
/* upb_decoder construction/destruction. **************************************/
upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
@@ -169,12 +165,13 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink)
d->top->end_offset = 0;
}
+
/* upb_decoder buffering. *****************************************************/
+// Discards the current buffer if we are done with it, make the next buffer
+// current if there is one.
static void upb_decoder_advancebuf(upb_decoder *d)
{
- // Discard the current buffer if we are done with it, make the next buffer
- // current if there is one.
if(d->buf_bytes_remaining <= 0) {
if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf);
d->buf = d->nextbuf;
@@ -185,13 +182,9 @@ static void upb_decoder_advancebuf(upb_decoder *d)
static void upb_decoder_pullnextbuf(upb_decoder *d)
{
- if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // Need another buffer?
- // We test the eof flag both before and after the get; checking it
- // before lets us short-circuit the get if we are already at eof,
- // checking it after makes sure we don't report an error if the get only
- // failed because of eof.
- if(!(d->nextbuf = upb_bytesrc_get(d->bytesrc)) &&
- !upb_bytesrc_eof(d->bytesrc)) {
+ if(!d->nextbuf) {
+ d->nextbuf = upb_bytesrc_get(d->bytesrc);
+ if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) {
// There was an error in the byte stream, halt the decoder.
upb_copyerr(&d->status, upb_bytesrc_status(d->bytesrc));
return;
@@ -202,7 +195,10 @@ static void upb_decoder_pullnextbuf(upb_decoder *d)
static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
{
d->buf_bytes_remaining -= bytes;
- while(d->buf_bytes_remaining < 0) upb_decoder_getbuf(d);
+ while(d->buf_bytes_remaining <= 0) {
+ upb_decoder_pullnextbuf(d);
+ upb_decoder_advancebuf(d);
+ }
}
static void upb_decoder_skipgroup(upb_decoder *d)
@@ -213,31 +209,29 @@ static void upb_decoder_skipgroup(upb_decoder *d)
while(upb_decoder_getdef(d)) upb_decoder_skipval(d);
}
-static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes)
+static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes)
{
- if(d->buf_bytes_remaining < 10) {
- upb_strlen_t total = 0;
- if(d->buf) {
- upb_strlen_t len = upb_string_len(d->buf);
- memcpy(d->overflow_buf, upb_string_getrobuf(d->buf), len);
- total += len;
- if(d->nextbuf) {
- len = upb_string_len(d->nextbuf);
- if(total + len > 10) len = 10 - total;
- memcpy(d->overflow_buf + total, upb_string_getrobuf(d->nextbuf, len));
- total += len;
- }
- }
- memset(d->overflow_buf + total, 0x80, 10 - total);
- } else {
+ upb_decoder_pullnextbuf(d);
+ upb_decoder_advancebuf(d);
+ if(d->buf_bytes_remaining >= UPB_MAX_ENCODED_SIZE) {
return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) -
d->buf_bytes_remaining;
+ } else {
+ upb_strlen_t total = 0;
+ if(d->buf) total += upb_decoder_append(d->buf, total);
+ if(d->nextbuf) total += upb_decoder_append(d->nextbuf, total);
+ memset(d->overflow_buf + total, 0x80, UPB_MAX_ENCODED_SIZE - total);
}
}
+// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE
+// bytes long. This buffer contains the next bytes in the stream (even if
+// those bytes span multiple buffers). *bytes is set to the number of actual
+// stream bytes that are available in the returned buffer. If
+// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes.
INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes)
{
- if(d->buf_bytes_remaining >= 10) {
+ if(d->buf_bytes_remaining >= UPB_MAX_ENCODED_SIZE) {
*bytes = d->buf_bytes_remaining;
return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) -
d->buf_bytes_remaining;
@@ -246,25 +240,31 @@ INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes)
}
}
+/* upb_src implementation for upb_decoder. ************************************/
+
upb_fielddef *upb_decoder_getdef(upb_decoder *d)
{
// Detect end-of-submessage.
- if(offset >= d->top->end_offset) {
+ if(upb_decoder_offset(d) >= d->top->end_offset) {
d->eof = true;
return NULL;
}
// Handles the packed field case.
if(d->field) return d->field;
- if(d->eof) return NULL;
again:
uint32_t key;
- if(!upb_decoder_get_v_uint32(d, &key)) return NULL;
- if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) {
+ if(!upb_decoder_get_v_uint32(d, &key)) {
+ return NULL;
+
+ if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ // For delimited wire values we parse the length now, since we need it in
+ // all cases.
+ if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL;
+ } else if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) {
if(isgroup(d->top->submsg_end)) {
d->eof = true;
- d->status->code = UPB_STATUS_EOF;
} else {
upb_seterr(d->status, UPB_STATUS_ERROR, "End group seen but current "
"message is not a group, byte offset: %zd",
@@ -273,59 +273,66 @@ again:
return NULL;
}
- // For delimited wire values we parse the length now, since we need it in all
- // cases.
- if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) {
- if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL;
- }
-
// Look up field by tag number.
upb_fielddef *f = upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key));
- if (!f || !upb_check_type(upb_wiretype_from_key(key), f->type)) {
- // Unknown field or incorrect wire type. In the future these cases may be
- // separated, like if we want to give the client unknown fields but not
- // incorrect fields.
+ if (!f) {
+ // Unknown field. If/when the upb_src interface supports reporting
+ // unknown fields we will implement that here.
upb_decoder_skipval(d);
goto again;
+ } else if (!upb_check_type(upb_wiretype_from_key(key), f->type)) {
+ // This is a recoverable error condition. We skip the value but also
+ // return NULL and report the error.
+ upb_decoder_skipval(d);
+ // TODO: better error message.
+ upb_seterr(&d->status, UPB_STATUS_ERROR, "Incorrect wire type.\n");
+ return NULL;
}
+ d->field = f;
return f;
}
bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
{
- uint32_t bytes;
if(expected_type_for_field == UPB_DELIMITED) {
// A string, bytes, or a length-delimited submessage. The latter isn't
// technically a string, but can be gotten as one to perform lazy parsing.
d->str = upb_string_tryrecycle(d->str);
- if (d->delimited_len <= d->buf_bytes_remaining) {
+ const upb_strlen_t total_len = d->delimited_len;
+ if (total_len <= d->buf_bytes_remaining) {
// The entire string is inside our current buffer, so we can just
// return a substring of the buffer without copying.
upb_string_substr(d->str, d->buf,
upb_string_len(d->buf) - d->buf_bytes_remaining,
- d->delimited_len);
- d->buf_bytes_remaining -= d->delimited_len;
+ total_len);
+ d->buf_bytes_remaining -= total_len
*val.str = d->str;
} else {
- // The string spans buffers, so we must copy.
- memcpy(upb_string_getrwbuf(d->str, len),
- upb_string_getrobuf(d->buf) + upb_string_len(d->buf),
- bar);
- if(!upb_bytesrc_append(d->bytesrc, d->str, len)) goto err;
+ // The string spans buffers, so we must copy from the current buffer,
+ // the next buffer (if we have one), and finally from the bytesrc.
+ char *str = upb_string_getrwbuf(d->str, d->);
+ upb_strlen_t len = 0;
+ len += upb_decoder_append(d->buf, len, total_len);
+ if(!upb_decoder_advancebuf(d)) goto err;
+ if(d->buf) len += upb_decoder_append(d->buf, len, total_len);
+ if(len < total_len)
+ if(!upb_bytesrc_append(d->bytesrc, d->str, len - bytes)) goto err;
}
+ d->field = NULL;
} else {
// For all of the integer types we need the bytes to be in a single
// contiguous buffer.
+ uint32_t bytes;
const uint8_t *buf = upb_decoder_getbuf(d, &bytes)
switch(expected_type_for_field) {
- case UPB_32BIT_VARINT:
+ case UPB_64BIT_VARINT:
if(upb_get_v_uint32(buf, val.uint32) > 10) goto err;
- if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32);
+ if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64);
break;
- case UPB_64BIT_VARINT: {
+ case UPB_32BIT_VARINT:
if(upb_get_v_uint64(buf, val.uint64) > 5) goto err;
- if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64);
+ if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32);
break;
case UPB_64BIT_FIXED:
if(bytes < 8) goto err;
@@ -338,9 +345,12 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
default:
// Including start/end group.
goto err;
+ }
+ if(wire_type != UPB_WIRE_TYPE_DELIMITED ||
+ upb_decoder_offset(d) >= d->packed_end_offset) {
+ d->field = NULL;
+ }
}
- if(non-packed field || packed field that is done)
- d->field = NULL;
return true;
err:
}
@@ -356,6 +366,7 @@ bool upb_decoder_skipval(upb_decoder *d) {
case UPB_WIRE_TYPE_START_GROUP:
return upb_skip_groups(1);
case UPB_WIRE_TYPE_DELIMITED:
+ // Works for both string/bytes *and* submessages.
return upb_skip_bytes(d->delimited_len);
default:
// Including UPB_WIRE_TYPE_END_GROUP.
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index ea20d3d..d40d9fc 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -1,15 +1,16 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * upb_decoder implements a high performance, callback-based, stream-oriented
- * decoder (comparable to the SAX model in XML parsers). For parsing protobufs
- * into in-memory messages (a more DOM-like model), see the routines in
- * upb_msg.h, which are layered on top of this decoder.
+ * upb_decoder implements a high performance, streaming decoder for protobuf
+ * data that works by implementing upb_src and getting its data from a
+ * upb_bytesrc.
*
- * TODO: the decoder currently does not support returning unknown values. This
- * can easily be added when it is needed.
+ * The decoder does not currently support non-blocking I/O, in the sense that
+ * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
+ * decoder when data becomes available again. Support for this could be added,
+ * but it would add complexity and perhaps cost efficiency also.
*
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_DECODER_H_
@@ -17,8 +18,8 @@
#include <stdbool.h>
#include <stdint.h>
-#include "upb.h"
-#include "descriptor.h"
+#include "upb_def.h"
+#include "upb_srcsink.h"
#ifdef __cplusplus
extern "C" {
@@ -33,17 +34,17 @@ typedef struct upb_decoder upb_decoder;
// Allocates and frees a upb_decoder, respectively.
upb_decoder *upb_decoder_new(upb_msgdef *md);
-void upb_decoder_free(upb_decoder *p);
+void upb_decoder_free(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
// state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Parsers must be reset before they can be
// used. A decoder can be reset multiple times.
-void upb_decoder_reset(upb_decoder *p, upb_bytesrc *bytesrc);
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc);
// Returns a upb_src pointer by which the decoder can be used. The returned
-// upb_src is invalidated by upb_decoder_reset().
-upb_src *upb_decoder_getsrc(upb_decoder *p);
+// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free().
+upb_src *upb_decoder_getsrc(upb_decoder *d);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h
index 6a60f31..3a57cc8 100644
--- a/src/upb_srcsink.h
+++ b/src/upb_srcsink.h
@@ -71,7 +71,7 @@ upb_status *upb_sink_status(upb_sink *sink);
/* upb_bytesrc ****************************************************************/
// Returns the next string in the stream. NULL is returned on error or eof.
-// The string must be at least "minlen" bytes long.
+// The string must be at least "minlen" bytes long unless the stream is eof.
//
// A ref is passed to the caller, though the caller is encouraged to pass the
// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce
@@ -140,6 +140,7 @@ typedef struct {
typedef struct {
upb_src_vtable *vtbl;
upb_status status;
+ bool eof;
#ifndef NDEBUG
int state; // For debug-mode checking of API usage.
#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback