summaryrefslogtreecommitdiff
path: root/upb/pb/decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb/decoder.c')
-rw-r--r--upb/pb/decoder.c1198
1 files changed, 566 insertions, 632 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 18bb430..0cfb12e 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -1,208 +1,63 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2008-2011 Google Inc. See LICENSE for details.
+ * Copyright (c) 2008-2013 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include <inttypes.h>
#include <setjmp.h>
+#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb/bytestream.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/varint.h"
+#include "upb/pb/decoder.int.h"
+#include "upb/pb/varint.int.h"
-#define UPB_NONDELIMITED (0xffffffffffffffffULL)
-
-/* upb_pbdecoder ****************************************************************/
-
-struct dasm_State;
-
-typedef struct {
- const upb_fielddef *f;
- uint64_t end_ofs;
- uint32_t group_fieldnum; // UINT32_MAX for non-groups.
- bool is_sequence; // frame represents seq or submsg/str? (f might be both).
- bool is_packed; // true for packed primitive sequences.
-} frame;
-
-struct upb_pbdecoder {
- // Where we push parsed data (not owned).
- upb_sink *sink;
-
- // Current input buffer and its stream offset.
- const char *buf, *ptr, *end, *checkpoint;
- uint64_t bufstart_ofs;
-
- // Buffer for residual bytes not parsed from the previous buffer.
- char residual[16];
- char *residual_end;
-
- // Stores the user buffer passed to our decode function.
- const char *buf_param;
- size_t size_param;
-
- // Equal to size_param while we are in the residual buf, 0 otherwise.
- size_t userbuf_remaining;
-
- // Used to temporarily store the return value before calling longjmp().
- size_t ret;
-
- // End of the delimited region, relative to ptr, or NULL if not in this buf.
- const char *delim_end;
-
-#ifdef UPB_USE_JIT_X64
- // For JIT, which doesn't do bounds checks in the middle of parsing a field.
- const char *jit_end, *effective_end; // == MIN(jit_end, delim_end)
-
- // Used momentarily by the generated code to store a value while a user
- // function is called.
- uint32_t tmp_len;
-
- const void *saved_rbp;
+#ifdef UPB_DUMP_BYTECODE
+#include <stdio.h>
#endif
- // Our internal stack.
- frame *top, *limit;
- frame stack[UPB_MAX_NESTING];
-
- // For exiting the decoder on error.
- jmp_buf exitjmp;
-};
-
-typedef struct {
- // The top-level handlers that this plan calls into. We own a ref.
- const upb_handlers *dest_handlers;
-
-#ifdef UPB_USE_JIT_X64
- // JIT-generated machine code (else NULL).
- char *jit_code;
- size_t jit_size;
- char *debug_info;
-
- // For storing upb_jitmsginfo, which contains per-msg runtime data needed
- // by the JIT.
- // Maps upb_handlers* -> upb_jitmsginfo.
- upb_inttable msginfo;
-
- // The following members are used only while the JIT is being built.
-
- // This pointer is allocated by dasm_init() and freed by dasm_free().
- struct dasm_State *dynasm;
-
- // For storing pclabel bases while we are building the JIT.
- // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base
- upb_inttable pclabels;
-
- // For marking labels that should go into the generated code.
- // Maps pclabel -> owned char* label.
- upb_inttable asmlabels;
-
- // This is not the same as len(pclabels) because the table only contains base
- // offsets for each def, but each def can have many pclabels.
- uint32_t pclabel_count;
-#endif
-} decoderplan;
-
-typedef struct {
- uint8_t native_wire_type;
- bool is_numeric;
-} upb_decoder_typeinfo;
-
-static const upb_decoder_typeinfo upb_decoder_types[] = {
- {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP
- {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE
- {UPB_WIRE_TYPE_32BIT, true}, // FLOAT
- {UPB_WIRE_TYPE_VARINT, true}, // INT64
- {UPB_WIRE_TYPE_VARINT, true}, // UINT64
- {UPB_WIRE_TYPE_VARINT, true}, // INT32
- {UPB_WIRE_TYPE_64BIT, true}, // FIXED64
- {UPB_WIRE_TYPE_32BIT, true}, // FIXED32
- {UPB_WIRE_TYPE_VARINT, true}, // BOOL
- {UPB_WIRE_TYPE_DELIMITED, false}, // STRING
- {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP
- {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE
- {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES
- {UPB_WIRE_TYPE_VARINT, true}, // UINT32
- {UPB_WIRE_TYPE_VARINT, true}, // ENUM
- {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32
- {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64
- {UPB_WIRE_TYPE_VARINT, true}, // SINT32
- {UPB_WIRE_TYPE_VARINT, true}, // SINT64
-};
-
-static upb_selector_t getselector(const upb_fielddef *f,
- upb_handlertype_t type) {
- upb_selector_t selector;
- bool ok = upb_handlers_getselector(f, type, &selector);
- UPB_ASSERT_VAR(ok, ok);
- return selector;
-}
-
-
-/* decoderplan ****************************************************************/
-
-#ifdef UPB_USE_JIT_X64
-// These defines are necessary for DynASM codegen.
-// See dynasm/dasm_proto.h for more info.
-#define Dst_DECL decoderplan *plan
-#define Dst_REF (plan->dynasm)
-#define Dst (plan)
-
-// In debug mode, make DynASM do internal checks (must be defined before any
-// dasm header is included.
-#ifndef NDEBUG
-#define DASM_CHECKS
-#endif
-
-#include "dynasm/dasm_proto.h"
-#include "upb/pb/decoder_x64.h"
-#endif
-
-void freeplan(void *_p) {
- decoderplan *p = _p;
- upb_handlers_unref(p->dest_handlers, p);
-#ifdef UPB_USE_JIT_X64
- if (p->jit_code) upb_decoderplan_freejit(p);
-#endif
- free(p);
-}
-
-static const decoderplan *getdecoderplan(const upb_handlers *h) {
- if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype())
- return NULL;
- upb_selector_t sel;
- if (!upb_handlers_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel))
- return NULL;
- return upb_handlers_gethandlerdata(h, sel);
-}
-
-bool upb_pbdecoder_isdecoder(const upb_handlers *h) {
- return getdecoderplan(h) != NULL;
-}
-
-bool upb_pbdecoder_hasjitcode(const upb_handlers *h) {
-#ifdef UPB_USE_JIT_X64
- const decoderplan *p = getdecoderplan(h);
- if (!p) return false;
- return p->jit_code != NULL;
-#else
- UPB_UNUSED(h);
- return false;
-#endif
-}
-
-const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) {
- const decoderplan *p = getdecoderplan(h);
- if (!p) return NULL;
- return p->dest_handlers;
+#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
+#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
+
+// Error messages that are shared between the bytecode and JIT decoders.
+const char *kPbDecoderStackOverflow = "Nesting too deep.";
+
+// Error messages shared within this file.
+static const char *kUnterminatedVarint = "Unterminated varint.";
+
+/* upb_pbdecoder **************************************************************/
+
+static opcode halt = OP_HALT;
+
+// Whether an op consumes any of the input buffer.
+static bool consumes_input(opcode op) {
+ switch (op) {
+ case OP_SETDISPATCH:
+ case OP_STARTMSG:
+ case OP_ENDMSG:
+ case OP_STARTSEQ:
+ case OP_ENDSEQ:
+ case OP_STARTSUBMSG:
+ case OP_ENDSUBMSG:
+ case OP_STARTSTR:
+ case OP_ENDSTR:
+ case OP_PUSHTAGDELIM:
+ case OP_POP:
+ case OP_SETDELIM:
+ case OP_SETGROUPNUM:
+ case OP_SETBIGGROUPNUM:
+ case OP_CHECKDELIM:
+ case OP_CALL:
+ case OP_BRANCH:
+ return false;
+ default:
+ return true;
+ }
}
-
-/* upb_pbdecoder ****************************************************************/
-
-static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
+static bool in_residual_buf(upb_pbdecoder *d, const char *p);
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
@@ -210,68 +65,65 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
// with these annotations. Every instance where these appear, gcc 4.2.1 made
// the wrong decision and degraded performance in benchmarks.
#define FORCEINLINE static inline __attribute__((always_inline))
-#define NOINLINE static __attribute__((noinline))
+#define NOINLINE __attribute__((noinline))
-static upb_status *decoder_status(upb_pbdecoder *d) {
+static void seterr(upb_pbdecoder *d, const char *msg) {
// TODO(haberman): encapsulate this access to pipeline->status, but not sure
// exactly what that interface should look like.
- return &d->sink->pipeline_->status_;
+ upb_status_seterrliteral(&d->sink->pipeline_->status_, msg);
}
-UPB_NORETURN static void exitjmp(upb_pbdecoder *d) {
- _longjmp(d->exitjmp, 1);
+void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
+ seterr(d, msg);
}
-UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) {
- d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf);
- upb_status_seterrliteral(decoder_status(d), msg);
- exitjmp(d);
-}
/* Buffering ******************************************************************/
// We operate on one buffer at a time, which is either the user's buffer passed
// to our "decode" callback or some residual bytes from the previous buffer.
-// How many bytes can be safely read from d->ptr.
-static size_t bufleft(upb_pbdecoder *d) {
- assert(d->end >= d->ptr);
- return d->end - d->ptr;
+// How many bytes can be safely read from d->ptr without reading past end-of-buf
+// or past the current delimited end.
+static size_t curbufleft(upb_pbdecoder *d) {
+ assert(d->data_end >= d->ptr);
+ return d->data_end - d->ptr;
+}
+
+static const char *ptr(upb_pbdecoder *d) {
+ return d->ptr;
}
// Overall offset of d->ptr.
-uint64_t offset(const upb_pbdecoder *d) {
- return d->bufstart_ofs + (d->ptr - d->buf);
+uint64_t offset(upb_pbdecoder *d) {
+ return d->bufstart_ofs + (ptr(d) - d->buf);
}
// Advances d->ptr.
static void advance(upb_pbdecoder *d, size_t len) {
- assert(bufleft(d) >= len);
+ assert(curbufleft(d) >= len);
d->ptr += len;
}
-// Commits d->ptr progress; should be called when an entire atomic value
-// (ie tag+value) has been successfully consumed.
-static void checkpoint(upb_pbdecoder *d) {
- d->checkpoint = d->ptr;
-}
-
static bool in_buf(const char *p, const char *buf, const char *end) {
return p >= buf && p <= end;
}
-static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
+static bool in_residual_buf(upb_pbdecoder *d, const char *p) {
return in_buf(p, d->residual, d->residual_end);
}
// Calculates the delim_end value, which represents a combination of the
// current buffer and the stack, so must be called whenever either is updated.
static void set_delim_end(upb_pbdecoder *d) {
- frame *f = d->top;
- size_t delimlen = f->end_ofs - d->bufstart_ofs;
- size_t buflen = d->end - d->buf;
- d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
- d->buf + delimlen : NULL; // NULL if not in this buf.
+ size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
+ if (delim_ofs <= (d->end - d->buf)) {
+ d->delim_end = d->buf + delim_ofs;
+ d->data_end = d->delim_end;
+ } else {
+ d->data_end = d->end;
+ d->delim_end = NULL;
+ }
}
static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
@@ -279,498 +131,603 @@ static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
d->buf = buf;
d->end = end;
set_delim_end(d);
-#ifdef UPB_USE_JIT_X64
- // If we start parsing a value, we can parse up to 20 bytes without
- // having to bounds-check anything (2 10-byte varints). Since the
- // JIT bounds-checks only *between* values (and for strings), the
- // JIT bails if there are not 20 bytes available.
- d->jit_end = d->end - 20;
-#endif
-}
-
-static void suspendjmp(upb_pbdecoder *d) {
- switchtobuf(d, d->residual, d->residual_end);
- exitjmp(d);
}
static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
- assert(d->ptr == d->end);
- d->bufstart_ofs += (d->ptr - d->buf);
+ assert(curbufleft(d) == 0);
+ d->bufstart_ofs += (d->end - d->buf);
switchtobuf(d, buf, buf + len);
}
-static void skip(upb_pbdecoder *d, size_t bytes) {
- size_t avail = bufleft(d);
- size_t total_avail = avail + d->userbuf_remaining;
- if (avail >= bytes) {
+static void checkpoint(upb_pbdecoder *d) {
+ // The assertion here is in the interests of efficiency, not correctness.
+ // We are trying to ensure that we don't checkpoint() more often than
+ // necessary.
+ assert(d->checkpoint != ptr(d));
+ d->checkpoint = ptr(d);
+}
+
+// Resumes the decoder from an initial state or from a previous suspend.
+void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
+ size_t size) {
+ UPB_UNUSED(p); // Useless; just for the benefit of the JIT.
+ d->buf_param = buf;
+ d->size_param = size;
+ d->skip = 0;
+ if (d->residual_end > d->residual) {
+ // We have residual bytes from the last buffer.
+ assert(ptr(d) == d->residual);
+ } else {
+ switchtobuf(d, buf, buf + size);
+ }
+ d->checkpoint = ptr(d);
+ return d; // For the JIT.
+}
+
+// Suspends the decoder at the last checkpoint, without saving any residual
+// bytes. If there are any unconsumed bytes, returns a short byte count.
+size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
+ d->pc = d->last;
+ if (d->checkpoint == d->residual) {
+ // Checkpoint was in residual buf; no user bytes were consumed.
+ d->ptr = d->residual;
+ return 0;
+ } else {
+ assert(!in_residual_buf(d, d->checkpoint));
+ assert(d->buf == d->buf_param);
+ size_t consumed = d->checkpoint - d->buf;
+ d->bufstart_ofs += consumed + d->skip;
+ d->residual_end = d->residual;
+ switchtobuf(d, d->residual, d->residual_end);
+ return consumed + d->skip;
+ }
+}
+
+// Suspends the decoder at the last checkpoint, and saves any unconsumed
+// bytes in our residual buffer. This is necessary if we need more user
+// bytes to form a complete value, which might not be contiguous in the
+// user's buffers. Always consumes all user bytes.
+static size_t suspend_save(upb_pbdecoder *d) {
+ // We hit end-of-buffer before we could parse a full value.
+ // Save any unconsumed bytes (if any) to the residual buffer.
+ d->pc = d->last;
+
+ if (d->checkpoint == d->residual) {
+ // Checkpoint was in residual buf; append user byte(s) to residual buf.
+ assert((d->residual_end - d->residual) + d->size_param <=
+ sizeof(d->residual));
+ if (!in_residual_buf(d, ptr(d))) {
+ d->bufstart_ofs -= (d->residual_end - d->residual);
+ }
+ memcpy(d->residual_end, d->buf_param, d->size_param);
+ d->residual_end += d->size_param;
+ } else {
+ // Checkpoint was in user buf; old residual bytes not needed.
+ assert(!in_residual_buf(d, d->checkpoint));
+ d->ptr = d->checkpoint;
+ size_t save = curbufleft(d);
+ assert(save <= sizeof(d->residual));
+ memcpy(d->residual, ptr(d), save);
+ d->residual_end = d->residual + save;
+ d->bufstart_ofs = offset(d) + d->skip;
+ }
+
+ switchtobuf(d, d->residual, d->residual_end);
+ return d->size_param + d->skip;
+}
+
+static int32_t skip(upb_pbdecoder *d, size_t bytes) {
+ assert(!in_residual_buf(d, ptr(d)) || d->size_param == 0);
+ if (curbufleft(d) >= bytes) {
// Skipped data is all in current buffer.
advance(d, bytes);
- } else if (total_avail >= bytes) {
- // Skipped data is all in residual buf and param buffer.
- assert(in_residual_buf(d, d->ptr));
- advance(d, avail);
- advancetobuf(d, d->buf_param, d->size_param);
- d->userbuf_remaining = 0;
- advance(d, bytes - avail);
} else {
// Skipped data extends beyond currently available buffers.
- // TODO: we need to do a checkdelim() equivalent that pops any frames that
- // we just skipped past.
- d->bufstart_ofs = offset(d) + bytes;
- d->residual_end = d->residual;
- d->ret += bytes - total_avail;
- suspendjmp(d);
+ d->skip = bytes - curbufleft(d);
+ advance(d, curbufleft(d));
}
+ return DECODE_OK;
}
-static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
- assert(bytes <= bufleft(d));
- memcpy(buf, d->ptr, bytes);
+FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ assert(bytes <= curbufleft(d));
+ memcpy(buf, ptr(d), bytes);
advance(d, bytes);
}
-NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) {
- const size_t avail = bufleft(d);
- if (avail + d->userbuf_remaining >= bytes) {
- // Remaining residual buffer and param buffer together can satisfy.
- // (We are only called from getbytes() which has already verified that
- // the current buffer alone cannot satisfy).
- assert(in_residual_buf(d, d->ptr));
- consumebytes(d, buf, avail);
+static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
+ const size_t avail = curbufleft(d);
+ consumebytes(d, buf, avail);
+ bytes -= avail;
+ assert(bytes > 0);
+ if (in_residual_buf(d, ptr(d))) {
advancetobuf(d, d->buf_param, d->size_param);
- consumebytes(d, buf + avail, bytes - avail);
- d->userbuf_remaining = 0;
+ }
+ if (curbufleft(d) >= bytes) {
+ consumebytes(d, buf + avail, bytes);
+ return DECODE_OK;
+ } else if (d->data_end - d->buf == d->top->end_ofs - d->bufstart_ofs) {
+ seterr(d, "Submessage ended in the middle of a value");
+ return upb_pbdecoder_suspend(d);
} else {
- // There is not enough remaining data, save residual bytes (if any)
- // starting at the last committed checkpoint and exit.
- if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) {
- // Checkpoint was in user buf; old residual bytes not needed.
- d->ptr = d->checkpoint;
- size_t save = bufleft(d);
- assert(save <= sizeof(d->residual));
- memcpy(d->residual, d->ptr, save);
- d->residual_end = d->residual + save;
- d->bufstart_ofs = offset(d);
- } else {
- // Checkpoint was in residual buf; append user byte(s) to residual buf.
- assert(d->checkpoint == d->residual);
- assert((d->residual_end - d->residual) + d->size_param <=
- sizeof(d->residual));
- if (!in_residual_buf(d, d->ptr)) {
- d->bufstart_ofs -= (d->residual_end - d->residual);
- }
- memcpy(d->residual_end, d->buf_param, d->size_param);
- d->residual_end += d->size_param;
- }
- suspendjmp(d);
+ return suspend_save(d);
}
}
-FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
- if (bufleft(d) >= bytes) {
+FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ if (curbufleft(d) >= bytes) {
// Buffer has enough data to satisfy.
consumebytes(d, buf, bytes);
+ return DECODE_OK;
} else {
- getbytes_slow(d, buf, bytes);
+ return getbytes_slow(d, buf, bytes);
+ }
+}
+
+static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
+ size_t ret = curbufleft(d);
+ memcpy(buf, ptr(d), ret);
+ if (in_residual_buf(d, ptr(d))) {
+ size_t copy = UPB_MIN(bytes - ret, d->size_param);
+ memcpy(buf + ret, d->buf_param, copy);
+ ret += copy;
}
+ return ret;
}
-FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) {
- uint8_t byte;
- getbytes(d, &byte, 1);
- return byte;
+FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ if (curbufleft(d) >= bytes) {
+ memcpy(buf, ptr(d), bytes);
+ return bytes;
+ } else {
+ return peekbytes_slow(d, buf, bytes);
+ }
}
/* Decoding of wire types *****************************************************/
-NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) {
+NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
+ uint64_t *u64) {
+ *u64 = 0;
uint8_t byte = 0x80;
- uint64_t u64 = 0;
int bitpos;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos;
+ int32_t ret = getbytes(d, &byte, 1);
+ if (ret >= 0) return ret;
+ *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
}
- if(bitpos == 70 && (byte & 0x80))
- abortjmp(d, "Unterminated varint");
- return u64;
-}
-
-NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) {
- uint64_t u64 = decode_varint_slow(d);
- if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint");
- return (uint32_t)u64;
-}
-
-// For tags and delimited lengths, which must be <=32bit and are usually small.
-FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) {
- // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
- if (bufleft(d) >= 2) {
- uint32_t ret = d->ptr[0] & 0x7f;
- if ((d->ptr[0] & 0x80) == 0) {
- advance(d, 1);
- return ret;
- }
- ret |= (d->ptr[1] & 0x7f) << 7;
- if ((d->ptr[1] & 0x80) == 0) {
- advance(d, 2);
- return ret;
- }
+ if(bitpos == 70 && (byte & 0x80)) {
+ seterr(d, kUnterminatedVarint);
+ return upb_pbdecoder_suspend(d);
}
- return decode_v32_slow(d);
+ return DECODE_OK;
}
-FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) {
- if (bufleft(d) >= 10) {
+FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
+ if (curbufleft(d) > 0 && !(*ptr(d) & 0x80)) {
+ *u64 = *ptr(d);
+ advance(d, 1);
+ return DECODE_OK;
+ } else if (curbufleft(d) >= 10) {
// Fast case.
- upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) abortjmp(d, "Unterminated varint");
- advance(d, r.p - d->ptr);
- return r.val;
+ upb_decoderet r = upb_vdecode_fast(ptr(d));
+ if (r.p == NULL) {
+ seterr(d, kUnterminatedVarint);
+ return upb_pbdecoder_suspend(d);
+ }
+ advance(d, r.p - ptr(d));
+ *u64 = r.val;
+ return DECODE_OK;
} else {
// Slow case -- varint spans buffer seam.
- return decode_varint_slow(d);
+ return upb_pbdecoder_decode_varint_slow(d, u64);
}
}
-FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) {
- uint32_t u32;
- getbytes(d, &u32, 4);
- return u32; // TODO: proper byte swapping for big-endian machines.
-}
-
-FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) {
+FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
uint64_t u64;
- getbytes(d, &u64, 8);
- return u64; // TODO: proper byte swapping for big-endian machines.
+ int32_t ret = decode_varint(d, &u64);
+ if (ret >= 0) return ret;
+ if (u64 > UINT32_MAX) {
+ seterr(d, "Unterminated 32-bit varint");
+ return upb_pbdecoder_suspend(d);
+ }
+ *u32 = u64;
+ return DECODE_OK;
}
-static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence,
- bool is_packed, int32_t group_fieldnum, uint64_t end) {
- frame *fr = d->top + 1;
- if (fr >= d->limit) abortjmp(d, "Nesting too deep.");
- fr->f = f;
- fr->is_sequence = is_sequence;
- fr->is_packed = is_packed;
- fr->end_ofs = end;
- fr->group_fieldnum = group_fieldnum;
- d->top = fr;
- set_delim_end(d);
+// TODO: proper byte swapping for big-endian machines.
+FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
+ return getbytes(d, u32, 4);
}
-static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) {
- if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG)))
- abortjmp(d, "startsubmsg failed.");
- int32_t group_fieldnum = (end == UPB_NONDELIMITED) ?
- (int32_t)upb_fielddef_number(f) : -1;
- push(d, f, false, false, group_fieldnum, end);
+// TODO: proper byte swapping for big-endian machines.
+FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
+ return getbytes(d, u64, 8);
}
-static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed,
- uint64_t end_ofs) {
- if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ)))
- abortjmp(d, "startseq failed.");
- push(d, f, true, packed, -1, end_ofs);
+int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
+ return decode_fixed32(d, u32);
}
-static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len,
- uint64_t end) {
- if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len))
- abortjmp(d, "startseq failed.");
- push(d, f, false, false, -1, end);
+int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
+ return decode_fixed64(d, u64);
}
-static void pop_submsg(upb_pbdecoder *d) {
- upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG));
- d->top--;
- set_delim_end(d);
-}
+static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
+static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
-static void pop_seq(upb_pbdecoder *d) {
- upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ));
- d->top--;
- set_delim_end(d);
+static bool push(upb_pbdecoder *d, uint64_t end) {
+ upb_pbdecoder_frame *fr = d->top;
+
+ if (end > fr->end_ofs) {
+ seterr(d, "Submessage end extends past enclosing submessage.");
+ return false;
+ } else if ((fr + 1) == d->limit) {
+ seterr(d, kPbDecoderStackOverflow);
+ return false;
+ }
+
+ fr++;
+ fr->end_ofs = end;
+ fr->u.dispatch = NULL;
+ fr->groupnum = -1;
+ d->top = fr;
+ return true;
}
-static void pop_string(upb_pbdecoder *d) {
- upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR));
- d->top--;
- set_delim_end(d);
+NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
+ uint64_t expected) {
+ uint64_t data = 0;
+ size_t bytes = upb_value_size(expected);
+ size_t read = peekbytes(d, &data, bytes);
+ if (read == bytes && data == expected) {
+ // Advance past matched bytes.
+ int32_t ok = getbytes(d, &data, read);
+ UPB_ASSERT_VAR(ok, ok < 0);
+ return DECODE_OK;
+ } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
+ return suspend_save(d);
+ } else {
+ return DECODE_MISMATCH;
+ }
}
-static void checkdelim(upb_pbdecoder *d) {
- while (d->delim_end && d->ptr >= d->delim_end) {
- // TODO(haberman): not sure what to do about this; if we detect this error
- // we can possibly violate the promise that errors are always signaled by a
- // short "parsed byte" count (because all bytes might have been successfully
- // parsed prior to detecting this error).
- // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end");
- if (d->top->is_sequence) {
- pop_seq(d);
- } else {
- pop_submsg(d);
+int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum,
+ uint8_t wire_type) {
+ if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) {
+ seterr(d, "Invalid field number");
+ return upb_pbdecoder_suspend(d);
+ }
+
+ if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
+ if (fieldnum != d->top->groupnum) {
+ seterr(d, "Unmatched ENDGROUP tag.");
+ return upb_pbdecoder_suspend(d);
+ }
+ return DECODE_ENDGROUP;
+ }
+
+ // TODO: deliver to unknown field callback.
+ switch (wire_type) {
+ case UPB_WIRE_TYPE_VARINT: {
+ uint64_t u64;
+ return decode_varint(d, &u64);
+ }
+ case UPB_WIRE_TYPE_32BIT:
+ return skip(d, 4);
+ case UPB_WIRE_TYPE_64BIT:
+ return skip(d, 8);
+ case UPB_WIRE_TYPE_DELIMITED: {
+ uint32_t len;
+ CHECK_RETURN(decode_v32(d, &len));
+ return skip(d, len);
}
+ case UPB_WIRE_TYPE_START_GROUP:
+ seterr(d, "Can't handle unknown groups yet");
+ return upb_pbdecoder_suspend(d);
+ case UPB_WIRE_TYPE_END_GROUP:
+ default:
+ seterr(d, "Invalid wire type");
+ return upb_pbdecoder_suspend(d);
}
}
+static int32_t dispatch(upb_pbdecoder *d) {
+ upb_inttable *dispatch = d->top->u.dispatch;
+
+ // Decode tag.
+ uint32_t tag;
+ CHECK_RETURN(decode_v32(d, &tag));
+ uint8_t wire_type = tag & 0x7;
+ uint32_t fieldnum = tag >> 3;
+
+ // Lookup tag. Because of packed/non-packed compatibility, we have to
+ // check the wire type against two possibilities.
+ upb_value val;
+ if (upb_inttable_lookup32(dispatch, fieldnum, &val)) {
+ uint64_t v = upb_value_getuint64(val);
+ if (wire_type == (v & 0xff)) {
+ d->pc = d->top->base + (v >> 16);
+ return DECODE_OK;
+ } else if (wire_type == ((v >> 8) & 0xff)) {
+ bool found =
+ upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
+ UPB_ASSERT_VAR(found, found);
+ d->pc = d->top->base + upb_value_getuint64(val);
+ return DECODE_OK;
+ }
+ }
+
+ // Unknown field or ENDGROUP.
+ int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
-/* Decoding of .proto types ***************************************************/
-
-// Technically, we are losing data if we see a 32-bit varint that is not
-// properly sign-extended. We could detect this and error about the data loss,
-// but proto2 does not do this, so we pass.
-
-#define T(type, sel, wt, name, convfunc) \
- static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \
- upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \
- (convfunc)(decode_ ## wt(d))); \
- } \
-
-static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
-static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
-
-T(INT32, INT32, varint, int32, int32_t)
-T(INT64, INT64, varint, int64, int64_t)
-T(UINT32, UINT32, varint, uint32, uint32_t)
-T(UINT64, UINT64, varint, uint64, uint64_t)
-T(FIXED32, UINT32, fixed32, uint32, uint32_t)
-T(FIXED64, UINT64, fixed64, uint64, uint64_t)
-T(SFIXED32, INT32, fixed32, int32, int32_t)
-T(SFIXED64, INT64, fixed64, int64, int64_t)
-T(BOOL, BOOL, varint, bool, bool)
-T(ENUM, INT32, varint, int32, int32_t)
-T(DOUBLE, DOUBLE, fixed64, double, upb_asdouble)
-T(FLOAT, FLOAT, fixed32, float, upb_asfloat)
-T(SINT32, INT32, varint, int32, upb_zzdec_32)
-T(SINT64, INT64, varint, int64, upb_zzdec_64)
-#undef T
-
-static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) {
- push_msg(d, f, UPB_NONDELIMITED);
-}
-
-static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) {
- uint32_t len = decode_v32(d);
- push_msg(d, f, offset(d) + len);
-}
-
-static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) {
- uint32_t strlen = decode_v32(d);
- if (strlen <= bufleft(d)) {
- upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen);
- if (strlen)
- upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
- d->ptr, strlen);
- upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR));
- advance(d, strlen);
+ if (ret == DECODE_ENDGROUP) {
+ d->pc = d->top->base - 1; // Back to OP_ENDMSG.
+ return DECODE_OK;
} else {
- // Buffer ends in the middle of the string; need to push a decoder frame
- // for it.
- push_str(d, f, strlen, offset(d) + strlen);
- if (bufleft(d)) {
- upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
- d->ptr, bufleft(d));
- advance(d, bufleft(d));
- }
- d->bufstart_ofs = offset(d);
- d->residual_end = d->residual;
- suspendjmp(d);
+ d->pc = d->last - 1; // Rewind to CHECKDELIM.
+ return ret;
}
}
/* The main decoding loop *****************************************************/
-static const upb_fielddef *decode_tag(upb_pbdecoder *d) {
- while (1) {
- uint32_t tag = decode_v32(d);
- uint8_t wire_type = tag & 0x7;
- uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL;
- const upb_handlers *h = d->sink->top->h; // TODO(haberman): rm
- f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum);
- bool packed = false;
-
- if (f) {
- // Wire type check.
- upb_descriptortype_t type = upb_fielddef_descriptortype(f);
- if (wire_type == upb_decoder_types[type].native_wire_type) {
- // Wire type is ok.
- } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
- upb_decoder_types[type].is_numeric)) {
- // Wire type is ok (and packed).
- packed = true;
- } else {
- f = NULL;
- }
- }
-
- // There are no explicit "startseq" or "endseq" markers in protobuf
- // streams, so we have to infer them by noticing when a repeated field
- // starts or ends.
- frame *fr = d->top;
- if (fr->is_sequence && fr->f != f) {
- pop_seq(d);
- fr = d->top;
- }
+size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
+ size_t size) {
+ upb_pbdecoder *d = closure;
+ const upb_pbdecoderplan *p = hd;
+ assert(buf);
+ upb_pbdecoder_resume(d, NULL, buf, size);
+ UPB_UNUSED(p);
- if (f && upb_fielddef_isseq(f) && !fr->is_sequence) {
- if (packed) {
- uint32_t len = decode_v32(d);
- push_seq(d, f, true, offset(d) + len);
- checkpoint(d);
- } else {
- push_seq(d, f, false, fr->end_ofs);
- }
- }
+#define VMCASE(op, code) \
+ case op: { code; if (consumes_input(op)) checkpoint(d); break; }
+#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
+ VMCASE(OP_PARSE_ ## type, { \
+ ctype val; \
+ CHECK_RETURN(decode_ ## wt(d, &val)); \
+ upb_sink_put ## name(d->sink, arg, (convfunc)(val)); \
+ })
- if (f) return f;
-
- // Unknown field or ENDGROUP.
- if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
- abortjmp(d, "Invalid field number");
- switch (wire_type) {
- case UPB_WIRE_TYPE_VARINT: decode_varint(d); break;
- case UPB_WIRE_TYPE_32BIT: skip(d, 4); break;
- case UPB_WIRE_TYPE_64BIT: skip(d, 8); break;
- case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break;
- case UPB_WIRE_TYPE_START_GROUP:
- abortjmp(d, "Can't handle unknown groups yet");
- case UPB_WIRE_TYPE_END_GROUP:
- if (fieldnum != fr->group_fieldnum)
- abortjmp(d, "Unmatched ENDGROUP tag");
- pop_submsg(d);
- break;
- default:
- abortjmp(d, "Invalid wire type");
+ while(1) {
+ d->last = d->pc;
+ int32_t instruction = *d->pc++;
+ opcode op = getop(instruction);
+ uint32_t arg = instruction >> 8;
+ int32_t longofs = arg;
+ assert(ptr(d) != d->residual_end);
+#ifdef UPB_DUMP_BYTECODE
+ fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
+ "%x %s (%d)\n",
+ (int)offset(d),
+ (int)(ptr(d) - d->buf),
+ (int)(d->data_end - ptr(d)),
+ (int)(d->end - ptr(d)),
+ (int)((d->top->end_ofs - d->bufstart_ofs) - (ptr(d) - d->buf)),
+ (int)(d->pc - 1 - upb_pbdecoderplan_codebase(p)),
+ upb_pbdecoder_getopname(op),
+ arg);
+#endif
+ switch (op) {
+ // Technically, we are losing data if we see a 32-bit varint that is not
+ // properly sign-extended. We could detect this and error about the data
+ // loss, but proto2 does not do this, so we pass.
+ PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
+ PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
+ PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
+ PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
+ PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
+ PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
+ PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
+ PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
+ PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
+ PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
+ PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
+ PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
+ PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
+
+ VMCASE(OP_SETDISPATCH,
+ d->top->base = d->pc - 1;
+ memcpy(&d->top->u.dispatch, d->pc, sizeof(void*));
+ d->pc += sizeof(void*) / sizeof(uint32_t);
+ )
+ VMCASE(OP_STARTMSG,
+ CHECK_SUSPEND(upb_sink_startmsg(d->sink));
+ )
+ VMCASE(OP_ENDMSG,
+ CHECK_SUSPEND(upb_sink_endmsg(d->sink));
+ assert(d->call_len > 0);
+ d->pc = d->callstack[--d->call_len];
+ )
+ VMCASE(OP_STARTSEQ,
+ CHECK_SUSPEND(upb_sink_startseq(d->sink, arg));
+ )
+ VMCASE(OP_ENDSEQ,
+ CHECK_SUSPEND(upb_sink_endseq(d->sink, arg));
+ )
+ VMCASE(OP_STARTSUBMSG,
+ CHECK_SUSPEND(upb_sink_startsubmsg(d->sink, arg));
+ )
+ VMCASE(OP_ENDSUBMSG,
+ CHECK_SUSPEND(upb_sink_endsubmsg(d->sink, arg));
+ )
+ VMCASE(OP_STARTSTR,
+ uint32_t len = d->top->end_ofs - offset(d);
+ CHECK_SUSPEND(upb_sink_startstr(d->sink, arg, len));
+ if (len == 0) {
+ d->pc++; // Skip OP_STRING.
+ }
+ )
+ VMCASE(OP_STRING,
+ uint32_t len = curbufleft(d);
+ CHECK_SUSPEND(upb_sink_putstring(d->sink, arg, ptr(d), len));
+ advance(d, len);
+ if (d->delim_end == NULL) { // String extends beyond this buf?
+ d->pc--;
+ d->bufstart_ofs += size;
+ d->residual_end = d->residual;
+ return size;
+ }
+ )
+ VMCASE(OP_ENDSTR,
+ CHECK_SUSPEND(upb_sink_endstr(d->sink, arg));
+ )
+ VMCASE(OP_PUSHTAGDELIM,
+ CHECK_SUSPEND(push(d, d->top->end_ofs));
+ )
+ VMCASE(OP_POP,
+ assert(d->top > d->stack);
+ d->top--;
+ )
+ VMCASE(OP_PUSHLENDELIM,
+ uint32_t len;
+ CHECK_RETURN(decode_v32(d, &len));
+ CHECK_SUSPEND(push(d, offset(d) + len));
+ set_delim_end(d);
+ )
+ VMCASE(OP_SETDELIM,
+ set_delim_end(d);
+ )
+ VMCASE(OP_SETGROUPNUM,
+ d->top->groupnum = arg;
+ )
+ VMCASE(OP_SETBIGGROUPNUM,
+ d->top->groupnum = *d->pc++;
+ )
+ VMCASE(OP_CHECKDELIM,
+ assert(!(d->delim_end && ptr(d) > d->delim_end));
+ if (ptr(d) == d->delim_end)
+ d->pc += longofs;
+ )
+ VMCASE(OP_CALL,
+ d->callstack[d->call_len++] = d->pc;
+ d->pc += longofs;
+ )
+ VMCASE(OP_BRANCH,
+ d->pc += longofs;
+ )
+ VMCASE(OP_TAG1,
+ CHECK_SUSPEND(curbufleft(d) > 0);
+ uint8_t expected = (arg >> 8) & 0xff;
+ if (*ptr(d) == expected) {
+ advance(d, 1);
+ } else {
+ int8_t shortofs;
+ badtag:
+ shortofs = arg;
+ if (shortofs == LABEL_DISPATCH) {
+ CHECK_RETURN(dispatch(d));
+ } else {
+ d->pc += shortofs;
+ break; // Avoid checkpoint().
+ }
+ }
+ )
+ VMCASE(OP_TAG2,
+ CHECK_SUSPEND(curbufleft(d) > 0);
+ uint16_t expected = (arg >> 8) & 0xffff;
+ if (curbufleft(d) >= 2) {
+ uint16_t actual;
+ memcpy(&actual, ptr(d), 2);
+ if (expected == actual) {
+ advance(d, 2);
+ } else {
+ goto badtag;
+ }
+ } else {
+ int32_t result = upb_pbdecoder_checktag_slow(d, expected);
+ if (result == DECODE_MISMATCH) goto badtag;
+ if (result >= 0) return result;
+ }
+ )
+ VMCASE(OP_TAGN, {
+ uint64_t expected;
+ memcpy(&expected, d->pc, 8);
+ d->pc += 2;
+ int32_t result = upb_pbdecoder_checktag_slow(d, expected);
+ if (result == DECODE_MISMATCH) goto badtag;
+ if (result >= 0) return result;
+ })
+ VMCASE(OP_HALT, {
+ return size;
+ })
}
- // TODO: deliver to unknown field callback.
- checkpoint(d);
- checkdelim(d);
}
}
-void *start(void *closure, const void *handler_data, size_t size_hint) {
- UPB_UNUSED(handler_data);
+void *upb_pbdecoder_start(void *closure, const void *handler_data,
+ size_t size_hint) {
UPB_UNUSED(size_hint);
upb_pbdecoder *d = closure;
+ const upb_pbdecoderplan *plan = handler_data;
+ UPB_UNUSED(plan);
+ if (upb_pbdecoderplan_hasjitcode(plan)) {
+ d->top->u.closure = d->sink->top->closure;
+ d->call_len = 0;
+ } else {
+ d->call_len = 1;
+ d->pc = upb_pbdecoderplan_codebase(plan);
+ }
assert(d);
assert(d->sink);
- upb_sink_startmsg(d->sink);
+ if (plan->topmethod->dest_handlers) {
+ assert(d->sink->top->h == plan->topmethod->dest_handlers);
+ }
+ d->status = &d->sink->pipeline_->status_;
return d;
}
-bool end(void *closure, const void *handler_data) {
- UPB_UNUSED(handler_data);
+bool upb_pbdecoder_end(void *closure, const void *handler_data) {
upb_pbdecoder *d = closure;
+ const upb_pbdecoderplan *plan = handler_data;
if (d->residual_end > d->residual) {
- // We have preserved bytes.
- upb_status_seterrliteral(decoder_status(d), "Unexpected EOF");
- return false;
- }
-
- // We may need to dispatch a top-level implicit frame.
- if (d->top == d->stack + 1 &&
- d->top->is_sequence &&
- !d->top->is_packed) {
- pop_seq(d);
- }
- if (d->top != d->stack) {
- upb_status_seterrliteral(
- decoder_status(d), "Ended inside delimited field.");
+ seterr(d, "Unexpected EOF");
return false;
}
- upb_sink_endmsg(d->sink);
- return true;
-}
-
-size_t decode(void *closure, const void *hd, const char *buf, size_t size) {
- upb_pbdecoder *d = closure;
- const decoderplan *plan = hd;
- UPB_UNUSED(plan);
- assert(d->sink->top->h == plan->dest_handlers);
-
- if (size == 0) return 0;
- // Assume we'll consume the whole buffer unless this is overwritten.
- d->ret = size;
- d->buf_param = buf;
- d->size_param = size;
-
- if (_setjmp(d->exitjmp)) {
- // Hit end-of-buffer or error.
- return d->ret;
- }
-
- if (d->residual_end > d->residual) {
- // We have residual bytes from the last buffer.
- d->userbuf_remaining = d->size_param;
- } else {
- d->userbuf_remaining = 0;
- advancetobuf(d, buf, d->size_param);
-
- if (d->top != d->stack &&
- upb_fielddef_isstring(d->top->f) &&
- !d->top->is_sequence) {
- // Last buffer ended in the middle of a string; deliver more of it.
- size_t len = d->top->end_ofs - offset(d);
- if (d->size_param >= len) {
- upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
- d->ptr, len);
- advance(d, len);
- pop_string(d);
- } else {
- upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
- d->ptr, d->size_param);
- advance(d, d->size_param);
- d->residual_end = d->residual;
- advancetobuf(d, d->residual, 0);
- return d->size_param;
- }
- }
- }
- checkpoint(d);
- const upb_fielddef *f = d->top->f;
- while(1) {
+ // Message ends here.
+ uint64_t end = offset(d);
+ d->top->end_ofs = end;
+ char dummy;
+ if (upb_pbdecoderplan_hasjitcode(plan)) {
#ifdef UPB_USE_JIT_X64
- upb_decoder_enterjit(d, plan);
- checkpoint(d);
- set_delim_end(d); // JIT doesn't keep this current.
+ if (d->top != d->stack)
+ d->stack->end_ofs = 0;
+ upb_pbdecoderplan_jitcode(plan)(closure, handler_data, &dummy, 0);
#endif
- checkdelim(d);
- if (!d->top->is_packed) {
- f = decode_tag(d);
+ } else {
+ d->stack->end_ofs = end;
+ uint32_t *p = d->pc - 1;
+ if (getop(*p) == OP_CHECKDELIM) {
+ // Rewind from OP_TAG* to OP_CHECKDELIM.
+ assert(getop(*d->pc) == OP_TAG1 ||
+ getop(*d->pc) == OP_TAG2 ||
+ getop(*d->pc) == OP_TAGN);
+ d->pc = p;
}
+ upb_pbdecoder_decode(closure, handler_data, &dummy, 0);
+ }
- switch (upb_fielddef_descriptortype(f)) {
- case UPB_DESCRIPTOR_TYPE_DOUBLE: decode_DOUBLE(d, f); break;
- case UPB_DESCRIPTOR_TYPE_FLOAT: decode_FLOAT(d, f); break;
- case UPB_DESCRIPTOR_TYPE_INT64: decode_INT64(d, f); break;
- case UPB_DESCRIPTOR_TYPE_UINT64: decode_UINT64(d, f); break;
- case UPB_DESCRIPTOR_TYPE_INT32: decode_INT32(d, f); break;
- case UPB_DESCRIPTOR_TYPE_FIXED64: decode_FIXED64(d, f); break;
- case UPB_DESCRIPTOR_TYPE_FIXED32: decode_FIXED32(d, f); break;
- case UPB_DESCRIPTOR_TYPE_BOOL: decode_BOOL(d, f); break;
- case UPB_DESCRIPTOR_TYPE_STRING: UPB_FALLTHROUGH_INTENDED;
- case UPB_DESCRIPTOR_TYPE_BYTES: decode_STRING(d, f); break;
- case UPB_DESCRIPTOR_TYPE_GROUP: decode_GROUP(d, f); break;
- case UPB_DESCRIPTOR_TYPE_MESSAGE: decode_MESSAGE(d, f); break;
- case UPB_DESCRIPTOR_TYPE_UINT32: decode_UINT32(d, f); break;
- case UPB_DESCRIPTOR_TYPE_ENUM: decode_ENUM(d, f); break;
- case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break;
- case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break;
- case UPB_DESCRIPTOR_TYPE_SINT32: decode_SINT32(d, f); break;
- case UPB_DESCRIPTOR_TYPE_SINT64: decode_SINT64(d, f); break;
- }
- checkpoint(d);
+ if (d->call_len != 0) {
+ seterr(d, "Unexpected EOF");
+ return false;
}
+
+ return upb_ok(&d->sink->pipeline_->status_);
}
void init(void *_d, upb_pipeline *p) {
UPB_UNUSED(p);
upb_pbdecoder *d = _d;
- d->limit = &d->stack[UPB_MAX_NESTING];
+ d->limit = &d->stack[UPB_DECODER_MAX_NESTING];
d->sink = NULL;
+ d->callstack[0] = &halt;
// reset() must be called before decoding; this is guaranteed by assert() in
// start().
}
@@ -778,15 +735,13 @@ void init(void *_d, upb_pipeline *p) {
void reset(void *_d) {
upb_pbdecoder *d = _d;
d->top = d->stack;
- d->top->is_sequence = false;
- d->top->is_packed = false;
- d->top->group_fieldnum = UINT32_MAX;
- d->top->end_ofs = UPB_NONDELIMITED;
+ d->top->end_ofs = UINT64_MAX;
d->bufstart_ofs = 0;
d->ptr = d->residual;
d->buf = d->residual;
d->end = d->residual;
d->residual_end = d->residual;
+ d->call_len = 1;
}
bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) {
@@ -807,24 +762,3 @@ const upb_frametype upb_pbdecoder_frametype = {
const upb_frametype *upb_pbdecoder_getframetype() {
return &upb_pbdecoder_frametype;
}
-
-const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
- bool allowjit,
- const void *owner) {
- UPB_UNUSED(allowjit);
- decoderplan *p = malloc(sizeof(*p));
- assert(upb_handlers_isfrozen(dest));
- p->dest_handlers = dest;
- upb_handlers_ref(dest, p);
-#ifdef UPB_USE_JIT_X64
- p->jit_code = NULL;
- if (allowjit) upb_decoderplan_makejit(p);
-#endif
-
- upb_handlers *h = upb_handlers_new(
- UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner);
- upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL);
- upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan);
- upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL);
- return h;
-}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback