diff options
Diffstat (limited to 'upb/pb/decoder.c')
-rw-r--r-- | upb/pb/decoder.c | 1198 |
1 files changed, 566 insertions, 632 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 18bb430..0cfb12e 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -1,208 +1,63 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2008-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2008-2013 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> */ #include <inttypes.h> #include <setjmp.h> +#include <stdarg.h> #include <stddef.h> #include <stdlib.h> #include "upb/bytestream.h" -#include "upb/pb/decoder.h" -#include "upb/pb/varint.h" +#include "upb/pb/decoder.int.h" +#include "upb/pb/varint.int.h" -#define UPB_NONDELIMITED (0xffffffffffffffffULL) - -/* upb_pbdecoder ****************************************************************/ - -struct dasm_State; - -typedef struct { - const upb_fielddef *f; - uint64_t end_ofs; - uint32_t group_fieldnum; // UINT32_MAX for non-groups. - bool is_sequence; // frame represents seq or submsg/str? (f might be both). - bool is_packed; // true for packed primitive sequences. -} frame; - -struct upb_pbdecoder { - // Where we push parsed data (not owned). - upb_sink *sink; - - // Current input buffer and its stream offset. - const char *buf, *ptr, *end, *checkpoint; - uint64_t bufstart_ofs; - - // Buffer for residual bytes not parsed from the previous buffer. - char residual[16]; - char *residual_end; - - // Stores the user buffer passed to our decode function. - const char *buf_param; - size_t size_param; - - // Equal to size_param while we are in the residual buf, 0 otherwise. - size_t userbuf_remaining; - - // Used to temporarily store the return value before calling longjmp(). - size_t ret; - - // End of the delimited region, relative to ptr, or NULL if not in this buf. - const char *delim_end; - -#ifdef UPB_USE_JIT_X64 - // For JIT, which doesn't do bounds checks in the middle of parsing a field. - const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) - - // Used momentarily by the generated code to store a value while a user - // function is called. - uint32_t tmp_len; - - const void *saved_rbp; +#ifdef UPB_DUMP_BYTECODE +#include <stdio.h> #endif - // Our internal stack. - frame *top, *limit; - frame stack[UPB_MAX_NESTING]; - - // For exiting the decoder on error. - jmp_buf exitjmp; -}; - -typedef struct { - // The top-level handlers that this plan calls into. We own a ref. - const upb_handlers *dest_handlers; - -#ifdef UPB_USE_JIT_X64 - // JIT-generated machine code (else NULL). - char *jit_code; - size_t jit_size; - char *debug_info; - - // For storing upb_jitmsginfo, which contains per-msg runtime data needed - // by the JIT. - // Maps upb_handlers* -> upb_jitmsginfo. - upb_inttable msginfo; - - // The following members are used only while the JIT is being built. - - // This pointer is allocated by dasm_init() and freed by dasm_free(). - struct dasm_State *dynasm; - - // For storing pclabel bases while we are building the JIT. - // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base - upb_inttable pclabels; - - // For marking labels that should go into the generated code. - // Maps pclabel -> owned char* label. - upb_inttable asmlabels; - - // This is not the same as len(pclabels) because the table only contains base - // offsets for each def, but each def can have many pclabels. - uint32_t pclabel_count; -#endif -} decoderplan; - -typedef struct { - uint8_t native_wire_type; - bool is_numeric; -} upb_decoder_typeinfo; - -static const upb_decoder_typeinfo upb_decoder_types[] = { - {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP - {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE - {UPB_WIRE_TYPE_32BIT, true}, // FLOAT - {UPB_WIRE_TYPE_VARINT, true}, // INT64 - {UPB_WIRE_TYPE_VARINT, true}, // UINT64 - {UPB_WIRE_TYPE_VARINT, true}, // INT32 - {UPB_WIRE_TYPE_64BIT, true}, // FIXED64 - {UPB_WIRE_TYPE_32BIT, true}, // FIXED32 - {UPB_WIRE_TYPE_VARINT, true}, // BOOL - {UPB_WIRE_TYPE_DELIMITED, false}, // STRING - {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP - {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE - {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES - {UPB_WIRE_TYPE_VARINT, true}, // UINT32 - {UPB_WIRE_TYPE_VARINT, true}, // ENUM - {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32 - {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64 - {UPB_WIRE_TYPE_VARINT, true}, // SINT32 - {UPB_WIRE_TYPE_VARINT, true}, // SINT64 -}; - -static upb_selector_t getselector(const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t selector; - bool ok = upb_handlers_getselector(f, type, &selector); - UPB_ASSERT_VAR(ok, ok); - return selector; -} - - -/* decoderplan ****************************************************************/ - -#ifdef UPB_USE_JIT_X64 -// These defines are necessary for DynASM codegen. -// See dynasm/dasm_proto.h for more info. -#define Dst_DECL decoderplan *plan -#define Dst_REF (plan->dynasm) -#define Dst (plan) - -// In debug mode, make DynASM do internal checks (must be defined before any -// dasm header is included. -#ifndef NDEBUG -#define DASM_CHECKS -#endif - -#include "dynasm/dasm_proto.h" -#include "upb/pb/decoder_x64.h" -#endif - -void freeplan(void *_p) { - decoderplan *p = _p; - upb_handlers_unref(p->dest_handlers, p); -#ifdef UPB_USE_JIT_X64 - if (p->jit_code) upb_decoderplan_freejit(p); -#endif - free(p); -} - -static const decoderplan *getdecoderplan(const upb_handlers *h) { - if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype()) - return NULL; - upb_selector_t sel; - if (!upb_handlers_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel)) - return NULL; - return upb_handlers_gethandlerdata(h, sel); -} - -bool upb_pbdecoder_isdecoder(const upb_handlers *h) { - return getdecoderplan(h) != NULL; -} - -bool upb_pbdecoder_hasjitcode(const upb_handlers *h) { -#ifdef UPB_USE_JIT_X64 - const decoderplan *p = getdecoderplan(h); - if (!p) return false; - return p->jit_code != NULL; -#else - UPB_UNUSED(h); - return false; -#endif -} - -const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) { - const decoderplan *p = getdecoderplan(h); - if (!p) return NULL; - return p->dest_handlers; +#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); +#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; } + +// Error messages that are shared between the bytecode and JIT decoders. +const char *kPbDecoderStackOverflow = "Nesting too deep."; + +// Error messages shared within this file. +static const char *kUnterminatedVarint = "Unterminated varint."; + +/* upb_pbdecoder **************************************************************/ + +static opcode halt = OP_HALT; + +// Whether an op consumes any of the input buffer. +static bool consumes_input(opcode op) { + switch (op) { + case OP_SETDISPATCH: + case OP_STARTMSG: + case OP_ENDMSG: + case OP_STARTSEQ: + case OP_ENDSEQ: + case OP_STARTSUBMSG: + case OP_ENDSUBMSG: + case OP_STARTSTR: + case OP_ENDSTR: + case OP_PUSHTAGDELIM: + case OP_POP: + case OP_SETDELIM: + case OP_SETGROUPNUM: + case OP_SETBIGGROUPNUM: + case OP_CHECKDELIM: + case OP_CALL: + case OP_BRANCH: + return false; + default: + return true; + } } - -/* upb_pbdecoder ****************************************************************/ - -static bool in_residual_buf(const upb_pbdecoder *d, const char *p); +static bool in_residual_buf(upb_pbdecoder *d, const char *p); // It's unfortunate that we have to micro-manage the compiler this way, // especially since this tuning is necessarily specific to one hardware @@ -210,68 +65,65 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p); // with these annotations. Every instance where these appear, gcc 4.2.1 made // the wrong decision and degraded performance in benchmarks. #define FORCEINLINE static inline __attribute__((always_inline)) -#define NOINLINE static __attribute__((noinline)) +#define NOINLINE __attribute__((noinline)) -static upb_status *decoder_status(upb_pbdecoder *d) { +static void seterr(upb_pbdecoder *d, const char *msg) { // TODO(haberman): encapsulate this access to pipeline->status, but not sure // exactly what that interface should look like. - return &d->sink->pipeline_->status_; + upb_status_seterrliteral(&d->sink->pipeline_->status_, msg); } -UPB_NORETURN static void exitjmp(upb_pbdecoder *d) { - _longjmp(d->exitjmp, 1); +void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { + seterr(d, msg); } -UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) { - d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf); - upb_status_seterrliteral(decoder_status(d), msg); - exitjmp(d); -} /* Buffering ******************************************************************/ // We operate on one buffer at a time, which is either the user's buffer passed // to our "decode" callback or some residual bytes from the previous buffer. -// How many bytes can be safely read from d->ptr. -static size_t bufleft(upb_pbdecoder *d) { - assert(d->end >= d->ptr); - return d->end - d->ptr; +// How many bytes can be safely read from d->ptr without reading past end-of-buf +// or past the current delimited end. +static size_t curbufleft(upb_pbdecoder *d) { + assert(d->data_end >= d->ptr); + return d->data_end - d->ptr; +} + +static const char *ptr(upb_pbdecoder *d) { + return d->ptr; } // Overall offset of d->ptr. -uint64_t offset(const upb_pbdecoder *d) { - return d->bufstart_ofs + (d->ptr - d->buf); +uint64_t offset(upb_pbdecoder *d) { + return d->bufstart_ofs + (ptr(d) - d->buf); } // Advances d->ptr. static void advance(upb_pbdecoder *d, size_t len) { - assert(bufleft(d) >= len); + assert(curbufleft(d) >= len); d->ptr += len; } -// Commits d->ptr progress; should be called when an entire atomic value -// (ie tag+value) has been successfully consumed. -static void checkpoint(upb_pbdecoder *d) { - d->checkpoint = d->ptr; -} - static bool in_buf(const char *p, const char *buf, const char *end) { return p >= buf && p <= end; } -static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { +static bool in_residual_buf(upb_pbdecoder *d, const char *p) { return in_buf(p, d->residual, d->residual_end); } // Calculates the delim_end value, which represents a combination of the // current buffer and the stack, so must be called whenever either is updated. static void set_delim_end(upb_pbdecoder *d) { - frame *f = d->top; - size_t delimlen = f->end_ofs - d->bufstart_ofs; - size_t buflen = d->end - d->buf; - d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? - d->buf + delimlen : NULL; // NULL if not in this buf. + size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; + if (delim_ofs <= (d->end - d->buf)) { + d->delim_end = d->buf + delim_ofs; + d->data_end = d->delim_end; + } else { + d->data_end = d->end; + d->delim_end = NULL; + } } static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { @@ -279,498 +131,603 @@ static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { d->buf = buf; d->end = end; set_delim_end(d); -#ifdef UPB_USE_JIT_X64 - // If we start parsing a value, we can parse up to 20 bytes without - // having to bounds-check anything (2 10-byte varints). Since the - // JIT bounds-checks only *between* values (and for strings), the - // JIT bails if there are not 20 bytes available. - d->jit_end = d->end - 20; -#endif -} - -static void suspendjmp(upb_pbdecoder *d) { - switchtobuf(d, d->residual, d->residual_end); - exitjmp(d); } static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { - assert(d->ptr == d->end); - d->bufstart_ofs += (d->ptr - d->buf); + assert(curbufleft(d) == 0); + d->bufstart_ofs += (d->end - d->buf); switchtobuf(d, buf, buf + len); } -static void skip(upb_pbdecoder *d, size_t bytes) { - size_t avail = bufleft(d); - size_t total_avail = avail + d->userbuf_remaining; - if (avail >= bytes) { +static void checkpoint(upb_pbdecoder *d) { + // The assertion here is in the interests of efficiency, not correctness. + // We are trying to ensure that we don't checkpoint() more often than + // necessary. + assert(d->checkpoint != ptr(d)); + d->checkpoint = ptr(d); +} + +// Resumes the decoder from an initial state or from a previous suspend. +void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, + size_t size) { + UPB_UNUSED(p); // Useless; just for the benefit of the JIT. + d->buf_param = buf; + d->size_param = size; + d->skip = 0; + if (d->residual_end > d->residual) { + // We have residual bytes from the last buffer. + assert(ptr(d) == d->residual); + } else { + switchtobuf(d, buf, buf + size); + } + d->checkpoint = ptr(d); + return d; // For the JIT. +} + +// Suspends the decoder at the last checkpoint, without saving any residual +// bytes. If there are any unconsumed bytes, returns a short byte count. +size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { + d->pc = d->last; + if (d->checkpoint == d->residual) { + // Checkpoint was in residual buf; no user bytes were consumed. + d->ptr = d->residual; + return 0; + } else { + assert(!in_residual_buf(d, d->checkpoint)); + assert(d->buf == d->buf_param); + size_t consumed = d->checkpoint - d->buf; + d->bufstart_ofs += consumed + d->skip; + d->residual_end = d->residual; + switchtobuf(d, d->residual, d->residual_end); + return consumed + d->skip; + } +} + +// Suspends the decoder at the last checkpoint, and saves any unconsumed +// bytes in our residual buffer. This is necessary if we need more user +// bytes to form a complete value, which might not be contiguous in the +// user's buffers. Always consumes all user bytes. +static size_t suspend_save(upb_pbdecoder *d) { + // We hit end-of-buffer before we could parse a full value. + // Save any unconsumed bytes (if any) to the residual buffer. + d->pc = d->last; + + if (d->checkpoint == d->residual) { + // Checkpoint was in residual buf; append user byte(s) to residual buf. + assert((d->residual_end - d->residual) + d->size_param <= + sizeof(d->residual)); + if (!in_residual_buf(d, ptr(d))) { + d->bufstart_ofs -= (d->residual_end - d->residual); + } + memcpy(d->residual_end, d->buf_param, d->size_param); + d->residual_end += d->size_param; + } else { + // Checkpoint was in user buf; old residual bytes not needed. + assert(!in_residual_buf(d, d->checkpoint)); + d->ptr = d->checkpoint; + size_t save = curbufleft(d); + assert(save <= sizeof(d->residual)); + memcpy(d->residual, ptr(d), save); + d->residual_end = d->residual + save; + d->bufstart_ofs = offset(d) + d->skip; + } + + switchtobuf(d, d->residual, d->residual_end); + return d->size_param + d->skip; +} + +static int32_t skip(upb_pbdecoder *d, size_t bytes) { + assert(!in_residual_buf(d, ptr(d)) || d->size_param == 0); + if (curbufleft(d) >= bytes) { // Skipped data is all in current buffer. advance(d, bytes); - } else if (total_avail >= bytes) { - // Skipped data is all in residual buf and param buffer. - assert(in_residual_buf(d, d->ptr)); - advance(d, avail); - advancetobuf(d, d->buf_param, d->size_param); - d->userbuf_remaining = 0; - advance(d, bytes - avail); } else { // Skipped data extends beyond currently available buffers. - // TODO: we need to do a checkdelim() equivalent that pops any frames that - // we just skipped past. - d->bufstart_ofs = offset(d) + bytes; - d->residual_end = d->residual; - d->ret += bytes - total_avail; - suspendjmp(d); + d->skip = bytes - curbufleft(d); + advance(d, curbufleft(d)); } + return DECODE_OK; } -static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { - assert(bytes <= bufleft(d)); - memcpy(buf, d->ptr, bytes); +FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { + assert(bytes <= curbufleft(d)); + memcpy(buf, ptr(d), bytes); advance(d, bytes); } -NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) { - const size_t avail = bufleft(d); - if (avail + d->userbuf_remaining >= bytes) { - // Remaining residual buffer and param buffer together can satisfy. - // (We are only called from getbytes() which has already verified that - // the current buffer alone cannot satisfy). - assert(in_residual_buf(d, d->ptr)); - consumebytes(d, buf, avail); +static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, + size_t bytes) { + const size_t avail = curbufleft(d); + consumebytes(d, buf, avail); + bytes -= avail; + assert(bytes > 0); + if (in_residual_buf(d, ptr(d))) { advancetobuf(d, d->buf_param, d->size_param); - consumebytes(d, buf + avail, bytes - avail); - d->userbuf_remaining = 0; + } + if (curbufleft(d) >= bytes) { + consumebytes(d, buf + avail, bytes); + return DECODE_OK; + } else if (d->data_end - d->buf == d->top->end_ofs - d->bufstart_ofs) { + seterr(d, "Submessage ended in the middle of a value"); + return upb_pbdecoder_suspend(d); } else { - // There is not enough remaining data, save residual bytes (if any) - // starting at the last committed checkpoint and exit. - if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) { - // Checkpoint was in user buf; old residual bytes not needed. - d->ptr = d->checkpoint; - size_t save = bufleft(d); - assert(save <= sizeof(d->residual)); - memcpy(d->residual, d->ptr, save); - d->residual_end = d->residual + save; - d->bufstart_ofs = offset(d); - } else { - // Checkpoint was in residual buf; append user byte(s) to residual buf. - assert(d->checkpoint == d->residual); - assert((d->residual_end - d->residual) + d->size_param <= - sizeof(d->residual)); - if (!in_residual_buf(d, d->ptr)) { - d->bufstart_ofs -= (d->residual_end - d->residual); - } - memcpy(d->residual_end, d->buf_param, d->size_param); - d->residual_end += d->size_param; - } - suspendjmp(d); + return suspend_save(d); } } -FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { - if (bufleft(d) >= bytes) { +FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { + if (curbufleft(d) >= bytes) { // Buffer has enough data to satisfy. consumebytes(d, buf, bytes); + return DECODE_OK; } else { - getbytes_slow(d, buf, bytes); + return getbytes_slow(d, buf, bytes); + } +} + +static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf, + size_t bytes) { + size_t ret = curbufleft(d); + memcpy(buf, ptr(d), ret); + if (in_residual_buf(d, ptr(d))) { + size_t copy = UPB_MIN(bytes - ret, d->size_param); + memcpy(buf + ret, d->buf_param, copy); + ret += copy; } + return ret; } -FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) { - uint8_t byte; - getbytes(d, &byte, 1); - return byte; +FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) { + if (curbufleft(d) >= bytes) { + memcpy(buf, ptr(d), bytes); + return bytes; + } else { + return peekbytes_slow(d, buf, bytes); + } } /* Decoding of wire types *****************************************************/ -NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) { +NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, + uint64_t *u64) { + *u64 = 0; uint8_t byte = 0x80; - uint64_t u64 = 0; int bitpos; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos; + int32_t ret = getbytes(d, &byte, 1); + if (ret >= 0) return ret; + *u64 |= (uint64_t)(byte & 0x7F) << bitpos; } - if(bitpos == 70 && (byte & 0x80)) - abortjmp(d, "Unterminated varint"); - return u64; -} - -NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) { - uint64_t u64 = decode_varint_slow(d); - if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint"); - return (uint32_t)u64; -} - -// For tags and delimited lengths, which must be <=32bit and are usually small. -FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) { - // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048). - if (bufleft(d) >= 2) { - uint32_t ret = d->ptr[0] & 0x7f; - if ((d->ptr[0] & 0x80) == 0) { - advance(d, 1); - return ret; - } - ret |= (d->ptr[1] & 0x7f) << 7; - if ((d->ptr[1] & 0x80) == 0) { - advance(d, 2); - return ret; - } + if(bitpos == 70 && (byte & 0x80)) { + seterr(d, kUnterminatedVarint); + return upb_pbdecoder_suspend(d); } - return decode_v32_slow(d); + return DECODE_OK; } -FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) { - if (bufleft(d) >= 10) { +FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { + if (curbufleft(d) > 0 && !(*ptr(d) & 0x80)) { + *u64 = *ptr(d); + advance(d, 1); + return DECODE_OK; + } else if (curbufleft(d) >= 10) { // Fast case. - upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) abortjmp(d, "Unterminated varint"); - advance(d, r.p - d->ptr); - return r.val; + upb_decoderet r = upb_vdecode_fast(ptr(d)); + if (r.p == NULL) { + seterr(d, kUnterminatedVarint); + return upb_pbdecoder_suspend(d); + } + advance(d, r.p - ptr(d)); + *u64 = r.val; + return DECODE_OK; } else { // Slow case -- varint spans buffer seam. - return decode_varint_slow(d); + return upb_pbdecoder_decode_varint_slow(d, u64); } } -FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) { - uint32_t u32; - getbytes(d, &u32, 4); - return u32; // TODO: proper byte swapping for big-endian machines. -} - -FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) { +FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { uint64_t u64; - getbytes(d, &u64, 8); - return u64; // TODO: proper byte swapping for big-endian machines. + int32_t ret = decode_varint(d, &u64); + if (ret >= 0) return ret; + if (u64 > UINT32_MAX) { + seterr(d, "Unterminated 32-bit varint"); + return upb_pbdecoder_suspend(d); + } + *u32 = u64; + return DECODE_OK; } -static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence, - bool is_packed, int32_t group_fieldnum, uint64_t end) { - frame *fr = d->top + 1; - if (fr >= d->limit) abortjmp(d, "Nesting too deep."); - fr->f = f; - fr->is_sequence = is_sequence; - fr->is_packed = is_packed; - fr->end_ofs = end; - fr->group_fieldnum = group_fieldnum; - d->top = fr; - set_delim_end(d); +// TODO: proper byte swapping for big-endian machines. +FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { + return getbytes(d, u32, 4); } -static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) { - if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG))) - abortjmp(d, "startsubmsg failed."); - int32_t group_fieldnum = (end == UPB_NONDELIMITED) ? - (int32_t)upb_fielddef_number(f) : -1; - push(d, f, false, false, group_fieldnum, end); +// TODO: proper byte swapping for big-endian machines. +FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { + return getbytes(d, u64, 8); } -static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed, - uint64_t end_ofs) { - if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ))) - abortjmp(d, "startseq failed."); - push(d, f, true, packed, -1, end_ofs); +int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) { + return decode_fixed32(d, u32); } -static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len, - uint64_t end) { - if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len)) - abortjmp(d, "startseq failed."); - push(d, f, false, false, -1, end); +int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) { + return decode_fixed64(d, u64); } -static void pop_submsg(upb_pbdecoder *d) { - upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG)); - d->top--; - set_delim_end(d); -} +static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } +static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } -static void pop_seq(upb_pbdecoder *d) { - upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ)); - d->top--; - set_delim_end(d); +static bool push(upb_pbdecoder *d, uint64_t end) { + upb_pbdecoder_frame *fr = d->top; + + if (end > fr->end_ofs) { + seterr(d, "Submessage end extends past enclosing submessage."); + return false; + } else if ((fr + 1) == d->limit) { + seterr(d, kPbDecoderStackOverflow); + return false; + } + + fr++; + fr->end_ofs = end; + fr->u.dispatch = NULL; + fr->groupnum = -1; + d->top = fr; + return true; } -static void pop_string(upb_pbdecoder *d) { - upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR)); - d->top--; - set_delim_end(d); +NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, + uint64_t expected) { + uint64_t data = 0; + size_t bytes = upb_value_size(expected); + size_t read = peekbytes(d, &data, bytes); + if (read == bytes && data == expected) { + // Advance past matched bytes. + int32_t ok = getbytes(d, &data, read); + UPB_ASSERT_VAR(ok, ok < 0); + return DECODE_OK; + } else if (read < bytes && memcmp(&data, &expected, read) == 0) { + return suspend_save(d); + } else { + return DECODE_MISMATCH; + } } -static void checkdelim(upb_pbdecoder *d) { - while (d->delim_end && d->ptr >= d->delim_end) { - // TODO(haberman): not sure what to do about this; if we detect this error - // we can possibly violate the promise that errors are always signaled by a - // short "parsed byte" count (because all bytes might have been successfully - // parsed prior to detecting this error). - // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end"); - if (d->top->is_sequence) { - pop_seq(d); - } else { - pop_submsg(d); +int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum, + uint8_t wire_type) { + if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) { + seterr(d, "Invalid field number"); + return upb_pbdecoder_suspend(d); + } + + if (wire_type == UPB_WIRE_TYPE_END_GROUP) { + if (fieldnum != d->top->groupnum) { + seterr(d, "Unmatched ENDGROUP tag."); + return upb_pbdecoder_suspend(d); + } + return DECODE_ENDGROUP; + } + + // TODO: deliver to unknown field callback. + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint64_t u64; + return decode_varint(d, &u64); + } + case UPB_WIRE_TYPE_32BIT: + return skip(d, 4); + case UPB_WIRE_TYPE_64BIT: + return skip(d, 8); + case UPB_WIRE_TYPE_DELIMITED: { + uint32_t len; + CHECK_RETURN(decode_v32(d, &len)); + return skip(d, len); } + case UPB_WIRE_TYPE_START_GROUP: + seterr(d, "Can't handle unknown groups yet"); + return upb_pbdecoder_suspend(d); + case UPB_WIRE_TYPE_END_GROUP: + default: + seterr(d, "Invalid wire type"); + return upb_pbdecoder_suspend(d); } } +static int32_t dispatch(upb_pbdecoder *d) { + upb_inttable *dispatch = d->top->u.dispatch; + + // Decode tag. + uint32_t tag; + CHECK_RETURN(decode_v32(d, &tag)); + uint8_t wire_type = tag & 0x7; + uint32_t fieldnum = tag >> 3; + + // Lookup tag. Because of packed/non-packed compatibility, we have to + // check the wire type against two possibilities. + upb_value val; + if (upb_inttable_lookup32(dispatch, fieldnum, &val)) { + uint64_t v = upb_value_getuint64(val); + if (wire_type == (v & 0xff)) { + d->pc = d->top->base + (v >> 16); + return DECODE_OK; + } else if (wire_type == ((v >> 8) & 0xff)) { + bool found = + upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val); + UPB_ASSERT_VAR(found, found); + d->pc = d->top->base + upb_value_getuint64(val); + return DECODE_OK; + } + } + + // Unknown field or ENDGROUP. + int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); -/* Decoding of .proto types ***************************************************/ - -// Technically, we are losing data if we see a 32-bit varint that is not -// properly sign-extended. We could detect this and error about the data loss, -// but proto2 does not do this, so we pass. - -#define T(type, sel, wt, name, convfunc) \ - static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \ - upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \ - (convfunc)(decode_ ## wt(d))); \ - } \ - -static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } -static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } - -T(INT32, INT32, varint, int32, int32_t) -T(INT64, INT64, varint, int64, int64_t) -T(UINT32, UINT32, varint, uint32, uint32_t) -T(UINT64, UINT64, varint, uint64, uint64_t) -T(FIXED32, UINT32, fixed32, uint32, uint32_t) -T(FIXED64, UINT64, fixed64, uint64, uint64_t) -T(SFIXED32, INT32, fixed32, int32, int32_t) -T(SFIXED64, INT64, fixed64, int64, int64_t) -T(BOOL, BOOL, varint, bool, bool) -T(ENUM, INT32, varint, int32, int32_t) -T(DOUBLE, DOUBLE, fixed64, double, upb_asdouble) -T(FLOAT, FLOAT, fixed32, float, upb_asfloat) -T(SINT32, INT32, varint, int32, upb_zzdec_32) -T(SINT64, INT64, varint, int64, upb_zzdec_64) -#undef T - -static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) { - push_msg(d, f, UPB_NONDELIMITED); -} - -static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) { - uint32_t len = decode_v32(d); - push_msg(d, f, offset(d) + len); -} - -static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) { - uint32_t strlen = decode_v32(d); - if (strlen <= bufleft(d)) { - upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen); - if (strlen) - upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING), - d->ptr, strlen); - upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR)); - advance(d, strlen); + if (ret == DECODE_ENDGROUP) { + d->pc = d->top->base - 1; // Back to OP_ENDMSG. + return DECODE_OK; } else { - // Buffer ends in the middle of the string; need to push a decoder frame - // for it. - push_str(d, f, strlen, offset(d) + strlen); - if (bufleft(d)) { - upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING), - d->ptr, bufleft(d)); - advance(d, bufleft(d)); - } - d->bufstart_ofs = offset(d); - d->residual_end = d->residual; - suspendjmp(d); + d->pc = d->last - 1; // Rewind to CHECKDELIM. + return ret; } } /* The main decoding loop *****************************************************/ -static const upb_fielddef *decode_tag(upb_pbdecoder *d) { - while (1) { - uint32_t tag = decode_v32(d); - uint8_t wire_type = tag & 0x7; - uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL; - const upb_handlers *h = d->sink->top->h; // TODO(haberman): rm - f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum); - bool packed = false; - - if (f) { - // Wire type check. - upb_descriptortype_t type = upb_fielddef_descriptortype(f); - if (wire_type == upb_decoder_types[type].native_wire_type) { - // Wire type is ok. - } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && - upb_decoder_types[type].is_numeric)) { - // Wire type is ok (and packed). - packed = true; - } else { - f = NULL; - } - } - - // There are no explicit "startseq" or "endseq" markers in protobuf - // streams, so we have to infer them by noticing when a repeated field - // starts or ends. - frame *fr = d->top; - if (fr->is_sequence && fr->f != f) { - pop_seq(d); - fr = d->top; - } +size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, + size_t size) { + upb_pbdecoder *d = closure; + const upb_pbdecoderplan *p = hd; + assert(buf); + upb_pbdecoder_resume(d, NULL, buf, size); + UPB_UNUSED(p); - if (f && upb_fielddef_isseq(f) && !fr->is_sequence) { - if (packed) { - uint32_t len = decode_v32(d); - push_seq(d, f, true, offset(d) + len); - checkpoint(d); - } else { - push_seq(d, f, false, fr->end_ofs); - } - } +#define VMCASE(op, code) \ + case op: { code; if (consumes_input(op)) checkpoint(d); break; } +#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \ + VMCASE(OP_PARSE_ ## type, { \ + ctype val; \ + CHECK_RETURN(decode_ ## wt(d, &val)); \ + upb_sink_put ## name(d->sink, arg, (convfunc)(val)); \ + }) - if (f) return f; - - // Unknown field or ENDGROUP. - if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) - abortjmp(d, "Invalid field number"); - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: decode_varint(d); break; - case UPB_WIRE_TYPE_32BIT: skip(d, 4); break; - case UPB_WIRE_TYPE_64BIT: skip(d, 8); break; - case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break; - case UPB_WIRE_TYPE_START_GROUP: - abortjmp(d, "Can't handle unknown groups yet"); - case UPB_WIRE_TYPE_END_GROUP: - if (fieldnum != fr->group_fieldnum) - abortjmp(d, "Unmatched ENDGROUP tag"); - pop_submsg(d); - break; - default: - abortjmp(d, "Invalid wire type"); + while(1) { + d->last = d->pc; + int32_t instruction = *d->pc++; + opcode op = getop(instruction); + uint32_t arg = instruction >> 8; + int32_t longofs = arg; + assert(ptr(d) != d->residual_end); +#ifdef UPB_DUMP_BYTECODE + fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d " + "%x %s (%d)\n", + (int)offset(d), + (int)(ptr(d) - d->buf), + (int)(d->data_end - ptr(d)), + (int)(d->end - ptr(d)), + (int)((d->top->end_ofs - d->bufstart_ofs) - (ptr(d) - d->buf)), + (int)(d->pc - 1 - upb_pbdecoderplan_codebase(p)), + upb_pbdecoder_getopname(op), + arg); +#endif + switch (op) { + // Technically, we are losing data if we see a 32-bit varint that is not + // properly sign-extended. We could detect this and error about the data + // loss, but proto2 does not do this, so we pass. + PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t) + PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t) + PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t) + PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t) + PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t) + PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t) + PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t) + PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t) + PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t) + PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t) + PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t) + PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t) + PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t) + + VMCASE(OP_SETDISPATCH, + d->top->base = d->pc - 1; + memcpy(&d->top->u.dispatch, d->pc, sizeof(void*)); + d->pc += sizeof(void*) / sizeof(uint32_t); + ) + VMCASE(OP_STARTMSG, + CHECK_SUSPEND(upb_sink_startmsg(d->sink)); + ) + VMCASE(OP_ENDMSG, + CHECK_SUSPEND(upb_sink_endmsg(d->sink)); + assert(d->call_len > 0); + d->pc = d->callstack[--d->call_len]; + ) + VMCASE(OP_STARTSEQ, + CHECK_SUSPEND(upb_sink_startseq(d->sink, arg)); + ) + VMCASE(OP_ENDSEQ, + CHECK_SUSPEND(upb_sink_endseq(d->sink, arg)); + ) + VMCASE(OP_STARTSUBMSG, + CHECK_SUSPEND(upb_sink_startsubmsg(d->sink, arg)); + ) + VMCASE(OP_ENDSUBMSG, + CHECK_SUSPEND(upb_sink_endsubmsg(d->sink, arg)); + ) + VMCASE(OP_STARTSTR, + uint32_t len = d->top->end_ofs - offset(d); + CHECK_SUSPEND(upb_sink_startstr(d->sink, arg, len)); + if (len == 0) { + d->pc++; // Skip OP_STRING. + } + ) + VMCASE(OP_STRING, + uint32_t len = curbufleft(d); + CHECK_SUSPEND(upb_sink_putstring(d->sink, arg, ptr(d), len)); + advance(d, len); + if (d->delim_end == NULL) { // String extends beyond this buf? + d->pc--; + d->bufstart_ofs += size; + d->residual_end = d->residual; + return size; + } + ) + VMCASE(OP_ENDSTR, + CHECK_SUSPEND(upb_sink_endstr(d->sink, arg)); + ) + VMCASE(OP_PUSHTAGDELIM, + CHECK_SUSPEND(push(d, d->top->end_ofs)); + ) + VMCASE(OP_POP, + assert(d->top > d->stack); + d->top--; + ) + VMCASE(OP_PUSHLENDELIM, + uint32_t len; + CHECK_RETURN(decode_v32(d, &len)); + CHECK_SUSPEND(push(d, offset(d) + len)); + set_delim_end(d); + ) + VMCASE(OP_SETDELIM, + set_delim_end(d); + ) + VMCASE(OP_SETGROUPNUM, + d->top->groupnum = arg; + ) + VMCASE(OP_SETBIGGROUPNUM, + d->top->groupnum = *d->pc++; + ) + VMCASE(OP_CHECKDELIM, + assert(!(d->delim_end && ptr(d) > d->delim_end)); + if (ptr(d) == d->delim_end) + d->pc += longofs; + ) + VMCASE(OP_CALL, + d->callstack[d->call_len++] = d->pc; + d->pc += longofs; + ) + VMCASE(OP_BRANCH, + d->pc += longofs; + ) + VMCASE(OP_TAG1, + CHECK_SUSPEND(curbufleft(d) > 0); + uint8_t expected = (arg >> 8) & 0xff; + if (*ptr(d) == expected) { + advance(d, 1); + } else { + int8_t shortofs; + badtag: + shortofs = arg; + if (shortofs == LABEL_DISPATCH) { + CHECK_RETURN(dispatch(d)); + } else { + d->pc += shortofs; + break; // Avoid checkpoint(). + } + } + ) + VMCASE(OP_TAG2, + CHECK_SUSPEND(curbufleft(d) > 0); + uint16_t expected = (arg >> 8) & 0xffff; + if (curbufleft(d) >= 2) { + uint16_t actual; + memcpy(&actual, ptr(d), 2); + if (expected == actual) { + advance(d, 2); + } else { + goto badtag; + } + } else { + int32_t result = upb_pbdecoder_checktag_slow(d, expected); + if (result == DECODE_MISMATCH) goto badtag; + if (result >= 0) return result; + } + ) + VMCASE(OP_TAGN, { + uint64_t expected; + memcpy(&expected, d->pc, 8); + d->pc += 2; + int32_t result = upb_pbdecoder_checktag_slow(d, expected); + if (result == DECODE_MISMATCH) goto badtag; + if (result >= 0) return result; + }) + VMCASE(OP_HALT, { + return size; + }) } - // TODO: deliver to unknown field callback. - checkpoint(d); - checkdelim(d); } } -void *start(void *closure, const void *handler_data, size_t size_hint) { - UPB_UNUSED(handler_data); +void *upb_pbdecoder_start(void *closure, const void *handler_data, + size_t size_hint) { UPB_UNUSED(size_hint); upb_pbdecoder *d = closure; + const upb_pbdecoderplan *plan = handler_data; + UPB_UNUSED(plan); + if (upb_pbdecoderplan_hasjitcode(plan)) { + d->top->u.closure = d->sink->top->closure; + d->call_len = 0; + } else { + d->call_len = 1; + d->pc = upb_pbdecoderplan_codebase(plan); + } assert(d); assert(d->sink); - upb_sink_startmsg(d->sink); + if (plan->topmethod->dest_handlers) { + assert(d->sink->top->h == plan->topmethod->dest_handlers); + } + d->status = &d->sink->pipeline_->status_; return d; } -bool end(void *closure, const void *handler_data) { - UPB_UNUSED(handler_data); +bool upb_pbdecoder_end(void *closure, const void *handler_data) { upb_pbdecoder *d = closure; + const upb_pbdecoderplan *plan = handler_data; if (d->residual_end > d->residual) { - // We have preserved bytes. - upb_status_seterrliteral(decoder_status(d), "Unexpected EOF"); - return false; - } - - // We may need to dispatch a top-level implicit frame. - if (d->top == d->stack + 1 && - d->top->is_sequence && - !d->top->is_packed) { - pop_seq(d); - } - if (d->top != d->stack) { - upb_status_seterrliteral( - decoder_status(d), "Ended inside delimited field."); + seterr(d, "Unexpected EOF"); return false; } - upb_sink_endmsg(d->sink); - return true; -} - -size_t decode(void *closure, const void *hd, const char *buf, size_t size) { - upb_pbdecoder *d = closure; - const decoderplan *plan = hd; - UPB_UNUSED(plan); - assert(d->sink->top->h == plan->dest_handlers); - - if (size == 0) return 0; - // Assume we'll consume the whole buffer unless this is overwritten. - d->ret = size; - d->buf_param = buf; - d->size_param = size; - - if (_setjmp(d->exitjmp)) { - // Hit end-of-buffer or error. - return d->ret; - } - - if (d->residual_end > d->residual) { - // We have residual bytes from the last buffer. - d->userbuf_remaining = d->size_param; - } else { - d->userbuf_remaining = 0; - advancetobuf(d, buf, d->size_param); - - if (d->top != d->stack && - upb_fielddef_isstring(d->top->f) && - !d->top->is_sequence) { - // Last buffer ended in the middle of a string; deliver more of it. - size_t len = d->top->end_ofs - offset(d); - if (d->size_param >= len) { - upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING), - d->ptr, len); - advance(d, len); - pop_string(d); - } else { - upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING), - d->ptr, d->size_param); - advance(d, d->size_param); - d->residual_end = d->residual; - advancetobuf(d, d->residual, 0); - return d->size_param; - } - } - } - checkpoint(d); - const upb_fielddef *f = d->top->f; - while(1) { + // Message ends here. + uint64_t end = offset(d); + d->top->end_ofs = end; + char dummy; + if (upb_pbdecoderplan_hasjitcode(plan)) { #ifdef UPB_USE_JIT_X64 - upb_decoder_enterjit(d, plan); - checkpoint(d); - set_delim_end(d); // JIT doesn't keep this current. + if (d->top != d->stack) + d->stack->end_ofs = 0; + upb_pbdecoderplan_jitcode(plan)(closure, handler_data, &dummy, 0); #endif - checkdelim(d); - if (!d->top->is_packed) { - f = decode_tag(d); + } else { + d->stack->end_ofs = end; + uint32_t *p = d->pc - 1; + if (getop(*p) == OP_CHECKDELIM) { + // Rewind from OP_TAG* to OP_CHECKDELIM. + assert(getop(*d->pc) == OP_TAG1 || + getop(*d->pc) == OP_TAG2 || + getop(*d->pc) == OP_TAGN); + d->pc = p; } + upb_pbdecoder_decode(closure, handler_data, &dummy, 0); + } - switch (upb_fielddef_descriptortype(f)) { - case UPB_DESCRIPTOR_TYPE_DOUBLE: decode_DOUBLE(d, f); break; - case UPB_DESCRIPTOR_TYPE_FLOAT: decode_FLOAT(d, f); break; - case UPB_DESCRIPTOR_TYPE_INT64: decode_INT64(d, f); break; - case UPB_DESCRIPTOR_TYPE_UINT64: decode_UINT64(d, f); break; - case UPB_DESCRIPTOR_TYPE_INT32: decode_INT32(d, f); break; - case UPB_DESCRIPTOR_TYPE_FIXED64: decode_FIXED64(d, f); break; - case UPB_DESCRIPTOR_TYPE_FIXED32: decode_FIXED32(d, f); break; - case UPB_DESCRIPTOR_TYPE_BOOL: decode_BOOL(d, f); break; - case UPB_DESCRIPTOR_TYPE_STRING: UPB_FALLTHROUGH_INTENDED; - case UPB_DESCRIPTOR_TYPE_BYTES: decode_STRING(d, f); break; - case UPB_DESCRIPTOR_TYPE_GROUP: decode_GROUP(d, f); break; - case UPB_DESCRIPTOR_TYPE_MESSAGE: decode_MESSAGE(d, f); break; - case UPB_DESCRIPTOR_TYPE_UINT32: decode_UINT32(d, f); break; - case UPB_DESCRIPTOR_TYPE_ENUM: decode_ENUM(d, f); break; - case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break; - case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break; - case UPB_DESCRIPTOR_TYPE_SINT32: decode_SINT32(d, f); break; - case UPB_DESCRIPTOR_TYPE_SINT64: decode_SINT64(d, f); break; - } - checkpoint(d); + if (d->call_len != 0) { + seterr(d, "Unexpected EOF"); + return false; } + + return upb_ok(&d->sink->pipeline_->status_); } void init(void *_d, upb_pipeline *p) { UPB_UNUSED(p); upb_pbdecoder *d = _d; - d->limit = &d->stack[UPB_MAX_NESTING]; + d->limit = &d->stack[UPB_DECODER_MAX_NESTING]; d->sink = NULL; + d->callstack[0] = &halt; // reset() must be called before decoding; this is guaranteed by assert() in // start(). } @@ -778,15 +735,13 @@ void init(void *_d, upb_pipeline *p) { void reset(void *_d) { upb_pbdecoder *d = _d; d->top = d->stack; - d->top->is_sequence = false; - d->top->is_packed = false; - d->top->group_fieldnum = UINT32_MAX; - d->top->end_ofs = UPB_NONDELIMITED; + d->top->end_ofs = UINT64_MAX; d->bufstart_ofs = 0; d->ptr = d->residual; d->buf = d->residual; d->end = d->residual; d->residual_end = d->residual; + d->call_len = 1; } bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) { @@ -807,24 +762,3 @@ const upb_frametype upb_pbdecoder_frametype = { const upb_frametype *upb_pbdecoder_getframetype() { return &upb_pbdecoder_frametype; } - -const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, - bool allowjit, - const void *owner) { - UPB_UNUSED(allowjit); - decoderplan *p = malloc(sizeof(*p)); - assert(upb_handlers_isfrozen(dest)); - p->dest_handlers = dest; - upb_handlers_ref(dest, p); -#ifdef UPB_USE_JIT_X64 - p->jit_code = NULL; - if (allowjit) upb_decoderplan_makejit(p); -#endif - - upb_handlers *h = upb_handlers_new( - UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner); - upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL); - upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan); - upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL); - return h; -} |