From 58a70b55c62cfefcbe7a55a2fd41ee6b87c7256f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 16:29:10 -0800 Subject: Decoder code structure is mostly in-place. --- core/upb_stream.h | 20 ++- core/upb_string.h | 57 ++++++-- stream/upb_decoder.c | 363 ++++++++++++++++++++++----------------------------- 3 files changed, 212 insertions(+), 228 deletions(-) diff --git a/core/upb_stream.h b/core/upb_stream.h index cf01a5f..54fd930 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -171,14 +171,18 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If <0, indicates error (check upb_bytesrc_status for details). +// read. If 0, indicates error and puts details in "status". INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count); + upb_strlen_t count, upb_status *status); // Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). +// existing string data (which avoids a copy). On the other hand, if +// the data was *not* already in an existing string, this copies it into +// a upb_string, and if the data needs to be put in a specific range of +// memory (because eg. you need to put it into a different kind of string +// object) then upb_bytesrc_get() could be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count); + upb_status *status); // A convenience function for getting all the remaining data in a upb_bytesrc // as a upb_string. Returns false and sets "status" if the operation fails. @@ -189,14 +193,6 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); } -// Returns the current error status for the stream. -// Note! The "eof" flag works like feof() in C; it cannot report end-of-file -// until a read has failed due to eof. It cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); -INLINE bool upb_bytesrc_eof(upb_bytesrc *src); - /* upb_bytesink ***************************************************************/ diff --git a/core/upb_string.h b/core/upb_string.h index 1f4b20c..04c0ae9 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -3,26 +3,39 @@ * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. + * This file defines a simple string type which is length-delimited instead + * of NULL-terminated, and which has useful sharing semantics. + * + * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() + * wheverever possible, while keeping both CPU and memory overhead low. + * Throughout upb there are situations where one wants to reference all or part + * of another string without copying. upb_string provides APIs for doing this. * * Characteristics of upb_string: * - strings are reference-counted. - * - strings are logically immutable. + * - strings are immutable (can be mutated only when first created or recycled). * - if a string has no other referents, it can be "recycled" into a new string * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. * * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations * unless the string has been marked thread-safe. + * + * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched + * idiom that we go with it instead of making our pointers uint8_t*. + * + * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE + * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if + * you are logically passing a reference to a upb_string to another thread + * (which implies that the other thread must eventually call unref of recycle), + * you have two options: + * + * - create a copy of the string that will be used in the other thread only. + * - call upb_string_get_synchronized_ref(), which will make getref, unref, and + * recycle thread-safe for this upb_string. */ #ifndef UPB_STRING_H @@ -83,10 +96,12 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); +// Internal-only; clients should call upb_string_unref(). void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. +// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE +// UNLESS THE STRING IS SYNCHRONIZED. INLINE void upb_string_unref(upb_string *str) { if (str && upb_atomic_read(&str->refcount) > 0 && upb_atomic_unref(&str->refcount)) { @@ -98,6 +113,7 @@ upb_string *upb_strdup(upb_string *s); // Forward-declare. // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. +// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! INLINE upb_string *upb_string_getref(upb_string *str) { int refcount = upb_atomic_read(&str->refcount); if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); @@ -163,8 +179,11 @@ void upb_string_substr(upb_string *str, upb_string *target_str, // data. Waiting for a clear use case before actually implementing it. // // Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). +// guarantees that the given string data will not change or be deleted until a +// matching call to upb_string_detach(), which may block until any concurrent +// readers have finished reading. upb_string_detach() preserves the contents +// of the string by copying the referenced data if there are any other +// referents. // void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); // void upb_string_detach(upb_string *str); @@ -207,6 +226,22 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) #define UPB_STACK_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) + +// A convenient way of specifying upb_strings as literals, like: +// +// upb_streql(UPB_STRLIT("expected"), other_str); +// +// However, this requires either C99 compound initializers or C++. +// Must ONLY be called with a string literal as its argument! +//#ifdef __cplusplus +//namespace upb { +//class String : public upb_string { +// // This constructor must ONLY be called with a string literal. +// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} +//}; +//} +//#define UPB_STRLIT(str) upb::String(str) +//#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b820b08..fbd7eba 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -11,127 +11,39 @@ #include #include "upb_def.h" -/* Functions to read wire values. *********************************************/ - -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } +/* Pure Decoding **************************************************************/ + +// The key fast-path varint-decoding routine. There are a lot of possibilities +// for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, + upb_status *status) { + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + upb_seterr(status, UPB_ERROR, "Unterminated varint"); + return false; +done: + return true; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint32_t)); - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint64_t)); - return uint64_end; -} - -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - -/* Functions to read .proto values. *******************************************/ +/* Decoding/Buffering of individual values ************************************/ // Performs zig-zag decoding, which is used by sint32 and sint64. INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. typedef struct { @@ -144,6 +56,7 @@ struct upb_decoder { // Immutable state of the decoder. upb_src src; upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; upb_msgdef *toplevel_msgdef; upb_decoder_frame stack[UPB_MAX_NESTING]; @@ -158,66 +71,108 @@ struct upb_decoder { // Current input buffer. upb_string *buf; + // Our current offset *within* buf. + upb_strlen_t buf_offset; + // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; +}; - // Our current offset *within* buf. Will be negative if we are buffering - // from previous buffers in tmpbuf. - upb_strlen_t buf_offset; +// Called only from the slow path, this function copies the next "len" bytes +// from the stream to "data", adjusting "buf" and "end" appropriately. +INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, + uint8_t **buf, uint8_t **end) { + while (len > 0) { + memcpy(data, *buf, *end-*buf); + len -= (*end-*buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *buf = upb_string_getrobuf(d->buf); + *end = *buf + upb_string_len(d->buf); + } +} - // Holds any bytes we have from previous buffers. The number of bytes we - // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; -}; +// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes +// available in our current buffer. We don't inline this because we accept +// that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d) { + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); + } else { + // Success. + return; + } +} -upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { - // Fast path; we know we have a complete varint in our existing buffer. - *high = 0; - uint32_t b; - uint8_t *ptr = p->ptr; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - return false; - } +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, + const uint8_t **end, upb_tag *tag) { + const uint8_t *buf = *_buf, *end = *_end; + uint32_t tag_int; + // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). + if (end - buf < 2) goto slow; // unlikely. + tag_int = *buf & 0x7f; + if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*buf & 0x7f) << 7; + if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. +slow: + if (!upb_decode_varint_slow(d, _buf, _end)) return false; + buf = *_buf; // Trick the next line into not overwriting us. +done: + *_buf = buf; + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return true; +} + +INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + return upb_decode_varint_fast(d); + else + return upb_decode_varint_slow(d); +} - done: - p->ptr = ptr; +INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, + uint8_t **buf, uint8_t **end, upb_value *val) { + static const char table = {0, 8, 0, 0, 0, 4}; + size_t bytes = table[wt]; + if (*end - *buf >= bytes) { + // Common (fast) case. + memcpy(&val, *buf, bytes); + *buf += bytes; } else { - // Slow path: we may have to combine one or more buffers to get a whole - // varint worth of data. - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, - "Varint was unterminated after 10 bytes.\n"); - } else { - // Success. - return; - } - ungetbytes(d, buf, p - buf); + if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + } + return true; +} + +// "val" initially holds the length of the string, this is replaced by the +// contents of the string. +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { + upb_string_recycle(str); + upb_strlen_t len = upb_valu_getint32(*val); + if (*end - *buf >= len) { + // Common (fast) case. + upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); + *buf += len; + } else { + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + return false; } + return true; } + +/* The main decoding loop *****************************************************/ + static const void *get_msgend(upb_decoder *d) { if(d->top->end_offset > 0) @@ -238,36 +193,29 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, +static bool upb_push(upb_decoder *d, const uint8_t *start, uint32_t submsg_len, upb_fielddef *f, upb_status *status) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return NULL; + upb_seterr(status, UPB_ERROR, "Nesting too deep."); + return false; } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - - upb_dispatch_startsubmsg(&d->dispatcher, f); - return get_msgend(d); + d->top->end_offset = d->completed_offset + submsg_len; + d->top->msgdef = upb_downcast_msgdef(f->def); + *submsg_end = get_msgend(d); + if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; + return true; } -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) +static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) { d->top--; upb_dispatch_endsubmsg(&d->dispatcher); - return get_msgend(d); + *submsg_end = get_msgend(d); + return true; } void upb_decoder_run(upb_src *src, upb_status *status) { @@ -278,11 +226,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_msgdef *msgdef = d->top->msgdef; upb_string *str = NULL; + upb_dispatch_startmsg(&d->dispatcher); + // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &buf, &end, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -290,24 +240,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); + upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - submsg_end = pop(d, start, status, &msgdef); - completed = buf; - goto check_msgend; + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); + goto check_msgend; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. CHECK(upb_decode_varint(d, &buf, &end, &val)); break; case UPB_WIRE_TYPE_32BIT: - CHECK(upb_decode_32bit(d, &buf, &end, &val)); - break; case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_64bit(d, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); break; } @@ -315,24 +260,31 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); if (!f) { - // Unknown field. + if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) + CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { - // Field has incorrect type. + // TODO: put more details in this error msg. + upb_seterr(status, UPB_ERROR, "Field had incorrect type."); + goto err; } // Perform any further massaging of the data now that we have the fielddef. // Now we can distinguish strings from submessages, and we know about // zig-zag-encoded types. // TODO: handle packed encoding. + // TODO: if we were being paranoid, we could check for 32-bit-varint types + // that the top 32 bits all match the highest bit of the low 32 bits. + // If this is not true we are losing data. But the main protobuf library + // doesn't check this, and it would slow us down, so pass for now. switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; + CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, str, upb_value_getint32(val))); - upb_value_setstr(&val, str); + CHECK(upb_decode_string(d, &val, &str)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -341,26 +293,27 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); break; default: - // Other types need no further processing at this point. + break; // Other types need no further processing at this point. } CHECK(upb_dispatch_value(d->sink, f, val, status)); check_msgend: while(buf >= submsg_end) { if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); + upb_seterr(status, UPB_ERROR, "Bad submessage end.") goto err; } - submsg_end = pop(d, start, status, &msgdef); + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); } - completed = buf; } + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + return; + err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; + if (upb_ok(status)) { + upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); + } } void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { -- cgit v1.2.3