summaryrefslogtreecommitdiff
path: root/upb/pb/decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb/decoder.c')
-rw-r--r--upb/pb/decoder.c914
1 files changed, 563 insertions, 351 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 065c495..2bfc717 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -6,12 +6,101 @@
*/
#include <inttypes.h>
+#include <setjmp.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb/bytestream.h"
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
+#define UPB_NONDELIMITED (0xffffffffffffffffULL)
+
+/* upb_pbdecoder ****************************************************************/
+
+struct dasm_State;
+
+typedef struct {
+ const upb_fielddef *f;
+ uint64_t end_ofs;
+ uint32_t group_fieldnum; // UINT32_MAX for non-groups.
+ bool is_sequence; // frame represents seq or submsg/str? (f might be both).
+ bool is_packed; // true for packed primitive sequences.
+} frame;
+
+struct upb_pbdecoder {
+ // Where we push parsed data (not owned).
+ upb_sink *sink;
+
+ // Current input buffer and its stream offset.
+ const char *buf, *ptr, *end, *checkpoint;
+ uint64_t bufstart_ofs;
+
+ // Buffer for residual bytes not parsed from the previous buffer.
+ char residual[16];
+ char *residual_end;
+
+ // Stores the user buffer passed to our decode function.
+ const char *buf_param;
+ size_t size_param;
+
+ // Equal to size_param while we are in the residual buf, 0 otherwise.
+ size_t userbuf_remaining;
+
+ // Used to temporarily store the return value before calling longjmp().
+ size_t ret;
+
+ // End of the delimited region, relative to ptr, or NULL if not in this buf.
+ const char *delim_end;
+
+#ifdef UPB_USE_JIT_X64
+ // For JIT, which doesn't do bounds checks in the middle of parsing a field.
+ const char *jit_end, *effective_end; // == MIN(jit_end, delim_end)
+
+ // Used momentarily by the generated code to store a value while a user
+ // function is called.
+ uint32_t tmp_len;
+
+ const void *saved_rbp;
+#endif
+
+ // Our internal stack.
+ frame *top, *limit;
+ frame stack[UPB_MAX_NESTING];
+
+ // For exiting the decoder on error.
+ jmp_buf exitjmp;
+};
+
+typedef struct {
+ // The top-level handlers that this plan calls into. We own a ref.
+ const upb_handlers *dest_handlers;
+
+#ifdef UPB_USE_JIT_X64
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
+
+ // For storing upb_jitmsginfo, which contains per-msg runtime data needed
+ // by the JIT.
+ // Maps upb_handlers* -> upb_jitmsginfo.
+ upb_inttable msginfo;
+
+ // The following members are used only while the JIT is being built.
+
+ // This pointer is allocated by dasm_init() and freed by dasm_free().
+ struct dasm_State *dynasm;
+
+ // For storing pclabel bases while we are building the JIT.
+ // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base
+ upb_inttable pclabels;
+
+ // This is not the same as len(pclabels) because the table only contains base
+ // offsets for each def, but each def can have many pclabels.
+ uint32_t pclabel_count;
+#endif
+} decoderplan;
+
typedef struct {
uint8_t native_wire_type;
bool is_numeric;
@@ -39,12 +128,21 @@ static const upb_decoder_typeinfo upb_decoder_types[] = {
{UPB_WIRE_TYPE_VARINT, true}, // SINT64
};
-/* upb_decoderplan ************************************************************/
+static upb_selector_t getselector(const upb_fielddef *f,
+ upb_handlertype_t type) {
+ upb_selector_t selector;
+ bool ok = upb_getselector(f, type, &selector);
+ UPB_ASSERT_VAR(ok, ok);
+ return selector;
+}
+
+
+/* decoderplan ****************************************************************/
#ifdef UPB_USE_JIT_X64
// These defines are necessary for DynASM codegen.
// See dynasm/dasm_proto.h for more info.
-#define Dst_DECL upb_decoderplan *plan
+#define Dst_DECL decoderplan *plan
#define Dst_REF (plan->dynasm)
#define Dst (plan)
@@ -58,39 +156,49 @@ static const upb_decoder_typeinfo upb_decoder_types[] = {
#include "upb/pb/decoder_x64.h"
#endif
-upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) {
- UPB_UNUSED(allowjit);
- upb_decoderplan *p = malloc(sizeof(*p));
- assert(upb_handlers_isfrozen(h));
- p->handlers = h;
- upb_handlers_ref(h, p);
-#ifdef UPB_USE_JIT_X64
- p->jit_code = NULL;
- if (allowjit) upb_decoderplan_makejit(p);
-#endif
- return p;
-}
-
-void upb_decoderplan_unref(upb_decoderplan *p) {
- // TODO: make truly refcounted.
- upb_handlers_unref(p->handlers, p);
+void freeplan(void *_p) {
+ decoderplan *p = _p;
+ upb_handlers_unref(p->dest_handlers, p);
#ifdef UPB_USE_JIT_X64
if (p->jit_code) upb_decoderplan_freejit(p);
#endif
free(p);
}
-bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
+static decoderplan *getdecoderplan(const upb_handlers *h) {
+ if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype())
+ return NULL;
+ upb_selector_t sel;
+ if (!upb_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel))
+ return NULL;
+ return upb_handlers_gethandlerdata(h, sel);
+}
+
+bool upb_pbdecoder_isdecoder(const upb_handlers *h) {
+ return getdecoderplan(h) != NULL;
+}
+
+bool upb_pbdecoder_hasjitcode(const upb_handlers *h) {
#ifdef UPB_USE_JIT_X64
+ decoderplan *p = getdecoderplan(h);
+ if (!p) return false;
return p->jit_code != NULL;
#else
- (void)p;
+ UPB_UNUSED(h);
return false;
#endif
}
+const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) {
+ decoderplan *p = getdecoderplan(h);
+ if (!p) return NULL;
+ return p->dest_handlers;
+}
+
+
+/* upb_pbdecoder ****************************************************************/
-/* upb_decoder ****************************************************************/
+static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
@@ -100,98 +208,73 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
#define FORCEINLINE static inline __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
-UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
- // Resumable decoder would back out to completed_ptr (and possibly get a
- // previous buffer).
- _longjmp(d->exitjmp, 1);
+static upb_status *decoder_status(upb_pbdecoder *d) {
+ // TODO(haberman): encapsulate this access to pipeline->status, but not sure
+ // exactly what that interface should look like.
+ return &d->sink->pipeline_->status_;
}
-UPB_NORETURN static void upb_decoder_exitjmp2(void *d) {
- upb_decoder_exitjmp(d);
+
+UPB_NORETURN static void exitjmp(upb_pbdecoder *d) {
+ _longjmp(d->exitjmp, 1);
}
-UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) {
- upb_status_seterrliteral(&d->status, msg);
- upb_decoder_exitjmp(d);
+
+UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) {
+ d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf);
+ upb_status_seterrliteral(decoder_status(d), msg);
+ exitjmp(d);
}
/* Buffering ******************************************************************/
-// We operate on one buffer at a time, which may be a subset of the currently
-// loaded byteregion data. When data for the buffer is completely gone we pull
-// the next one. When we've committed our progress we discard any previous
-// buffers' regions.
+// We operate on one buffer at a time, which is either the user's buffer passed
+// to our "decode" callback or some residual bytes from the previous buffer.
-static size_t upb_decoder_bufleft(upb_decoder *d) {
+// How many bytes can be safely read from d->ptr.
+static size_t bufleft(upb_pbdecoder *d) {
assert(d->end >= d->ptr);
return d->end - d->ptr;
}
-static void upb_decoder_advance(upb_decoder *d, size_t len) {
- assert(upb_decoder_bufleft(d) >= len);
+// Overall offset of d->ptr.
+uint64_t offset(const upb_pbdecoder *d) {
+ return d->bufstart_ofs + (d->ptr - d->buf);
+}
+
+// Advances d->ptr.
+static void advance(upb_pbdecoder *d, size_t len) {
+ assert(bufleft(d) >= len);
d->ptr += len;
}
-uint64_t upb_decoder_offset(upb_decoder *d) {
- return d->bufstart_ofs + (d->ptr - d->buf);
+// Commits d->ptr progress; should be called when an entire atomic value
+// (ie tag+value) has been successfully consumed.
+static void checkpoint(upb_pbdecoder *d) {
+ d->checkpoint = d->ptr;
}
-uint64_t upb_decoder_bufendofs(upb_decoder *d) {
- return d->bufstart_ofs + (d->end - d->buf);
+static bool in_buf(const char *p, const char *buf, const char *end) {
+ return p >= buf && p <= end;
}
-static bool upb_decoder_islegalend(upb_decoder *d) {
- if (d->top == d->stack) return true;
- if (d->top - 1 == d->stack &&
- d->top->is_sequence && !d->top->is_packed) return true;
- return false;
+static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
+ return in_buf(p, d->residual, d->residual_end);
}
-// Calculates derived values that we cache for speed. These reflect a
-// combination of the current buffer and the stack, so must be called whenever
-// either is updated.
-static void upb_decoder_setmsgend(upb_decoder *d) {
- upb_decoder_frame *f = d->top;
+// Calculates the delim_end value, which represents a combination of the
+// current buffer and the stack, so must be called whenever either is updated.
+static void set_delim_end(upb_pbdecoder *d) {
+ frame *f = d->top;
size_t delimlen = f->end_ofs - d->bufstart_ofs;
size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
- d->top_is_packed = f->is_packed;
-}
-
-static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
- assert(ofs >= upb_decoder_offset(d));
- if (ofs > upb_byteregion_endofs(d->input))
- upb_decoder_abortjmp(d, "Unexpected EOF");
- d->buf = NULL;
- d->ptr = NULL;
- d->end = NULL;
- d->delim_end = NULL;
-#ifdef UPB_USE_JIT_X64
- d->jit_end = NULL;
-#endif
- d->bufstart_ofs = ofs;
}
-static bool upb_trypullbuf(upb_decoder *d) {
- assert(upb_decoder_bufleft(d) == 0);
- upb_decoder_skiptonewbuf(d, upb_decoder_offset(d));
- if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) {
- switch (upb_byteregion_fetch(d->input)) {
- case UPB_BYTE_OK:
- assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0);
- break;
- case UPB_BYTE_EOF: return false;
- case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input");
- // Decoder resuming is not yet supported.
- case UPB_BYTE_WOULDBLOCK:
- upb_decoder_abortjmp(d, "Input returned WOULDBLOCK");
- }
- }
- size_t len;
- d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
- assert(len > 0);
- d->ptr = d->buf;
- d->end = d->buf + len;
- upb_decoder_setmsgend(d);
+static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
+ d->ptr = buf;
+ d->buf = buf;
+ d->end = end;
+ set_delim_end(d);
#ifdef UPB_USE_JIT_X64
// If we start parsing a value, we can parse up to 20 bytes without
// having to bounds-check anything (2 10-byte varints). Since the
@@ -199,172 +282,232 @@ static bool upb_trypullbuf(upb_decoder *d) {
// JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
- assert(upb_decoder_bufleft(d) > 0);
- return true;
}
-static void upb_pullbuf(upb_decoder *d) {
- if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
+static void suspendjmp(upb_pbdecoder *d) {
+ switchtobuf(d, d->residual, d->residual_end);
+ exitjmp(d);
+}
+
+static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
+ assert(len >= 0);
+ assert(d->ptr == d->end);
+ d->bufstart_ofs += (d->ptr - d->buf);
+ switchtobuf(d, buf, buf + len);
+}
+
+static void skip(upb_pbdecoder *d, size_t bytes) {
+ size_t avail = bufleft(d);
+ size_t total_avail = avail + d->userbuf_remaining;
+ if (avail >= bytes) {
+ // Skipped data is all in current buffer.
+ advance(d, bytes);
+ } else if (total_avail >= bytes) {
+ // Skipped data is all in residual buf and param buffer.
+ assert(in_residual_buf(d, d->ptr));
+ advance(d, avail);
+ advancetobuf(d, d->buf_param, d->size_param);
+ d->userbuf_remaining = 0;
+ advance(d, bytes - avail);
+ } else {
+ // Skipped data extends beyond currently available buffers.
+ // TODO: we need to do a checkdelim() equivalent that pops any frames that
+ // we just skipped past.
+ d->bufstart_ofs = offset(d) + bytes;
+ d->residual_end = d->residual;
+ d->ret += bytes - total_avail;
+ suspendjmp(d);
+ }
+}
+
+static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ assert(bytes <= bufleft(d));
+ memcpy(buf, d->ptr, bytes);
+ advance(d, bytes);
}
-static void upb_decoder_checkpoint(upb_decoder *d) {
- upb_byteregion_discard(d->input, upb_decoder_offset(d));
+NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) {
+ const size_t avail = bufleft(d);
+ if (avail + d->userbuf_remaining >= bytes) {
+ // Remaining residual buffer and param buffer together can satisfy.
+ // (We are only called from getbytes() which has already verified that
+ // the current buffer alone cannot satisfy).
+ assert(in_residual_buf(d, d->ptr));
+ consumebytes(d, buf, avail);
+ advancetobuf(d, d->buf_param, d->size_param);
+ consumebytes(d, buf + avail, bytes - avail);
+ d->userbuf_remaining = 0;
+ } else {
+ // There is not enough remaining data, save residual bytes (if any)
+ // starting at the last committed checkpoint and exit.
+ if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) {
+ // Checkpoint was in user buf; old residual bytes not needed.
+ d->ptr = d->checkpoint;
+ size_t save = bufleft(d);
+ assert(save <= sizeof(d->residual));
+ memcpy(d->residual, d->ptr, save);
+ d->residual_end = d->residual + save;
+ d->bufstart_ofs = offset(d);
+ } else {
+ // Checkpoint was in residual buf; append user byte(s) to residual buf.
+ assert(d->checkpoint == d->residual);
+ assert((d->residual_end - d->residual) + d->size_param <=
+ sizeof(d->residual));
+ if (!in_residual_buf(d, d->ptr)) {
+ d->bufstart_ofs -= (d->residual_end - d->residual);
+ }
+ memcpy(d->residual_end, d->buf_param, d->size_param);
+ d->residual_end += d->size_param;
+ }
+ suspendjmp(d);
+ }
}
-static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
- if (ofs <= upb_decoder_bufendofs(d)) {
- upb_decoder_advance(d, ofs - upb_decoder_offset(d));
+FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ if (bufleft(d) >= bytes) {
+ // Buffer has enough data to satisfy.
+ consumebytes(d, buf, bytes);
} else {
- upb_decoder_skiptonewbuf(d, ofs);
+ getbytes_slow(d, buf, bytes);
}
- upb_decoder_checkpoint(d);
}
-static void upb_decoder_discard(upb_decoder *d, size_t bytes) {
- upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
+FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) {
+ uint8_t byte;
+ getbytes(d, &byte, 1);
+ return byte;
}
/* Decoding of wire types *****************************************************/
-NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
+NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
- u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos;
- upb_decoder_advance(d, 1);
+ u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos;
}
if(bitpos == 70 && (byte & 0x80))
- upb_decoder_abortjmp(d, "Unterminated varint");
+ abortjmp(d, "Unterminated varint");
return u64;
}
+NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) {
+ uint64_t u64 = decode_varint_slow(d);
+ if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint");
+ return (uint32_t)u64;
+}
+
// For tags and delimited lengths, which must be <=32bit and are usually small.
-FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
- const char *p = d->ptr;
- uint32_t ret;
- uint64_t u64;
+FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) {
// Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
- if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
- ret = *p & 0x7f;
- if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
- ret |= (*p & 0x7f) << 7;
- if ((*(p++) & 0x80) == 0) goto done; // likely
-slow:
- u64 = upb_decode_varint_slow(d);
- if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint");
- ret = (uint32_t)u64;
- p = d->ptr; // Turn the next line into a nop.
-done:
- upb_decoder_advance(d, p - d->ptr);
- return ret;
-}
-
-// Returns true on success or false if we've hit a valid EOF.
-FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
- if (upb_decoder_bufleft(d) == 0 &&
- upb_decoder_islegalend(d) &&
- !upb_trypullbuf(d)) {
- return false;
+ if (bufleft(d) >= 2) {
+ uint32_t ret = d->ptr[0] & 0x7f;
+ if ((d->ptr[0] & 0x80) == 0) {
+ advance(d, 1);
+ return ret;
+ }
+ ret |= (d->ptr[1] & 0x7f) << 7;
+ if ((d->ptr[1] & 0x80) == 0) {
+ advance(d, 2);
+ return ret;
+ }
}
- *val = upb_decode_varint32(d);
- return true;
+ return decode_v32_slow(d);
}
-FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
- if (upb_decoder_bufleft(d) >= 10) {
+FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) {
+ if (bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint");
- upb_decoder_advance(d, r.p - d->ptr);
+ if (r.p == NULL) abortjmp(d, "Unterminated varint");
+ advance(d, r.p - d->ptr);
return r.val;
- } else if (upb_decoder_bufleft(d) > 0) {
- // Intermediate case -- worth it?
- char tmpbuf[10];
- memset(tmpbuf, 0x80, 10);
- memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
- upb_decoderet r = upb_vdecode_fast(tmpbuf);
- if (r.p != NULL) {
- upb_decoder_advance(d, r.p - tmpbuf);
- return r.val;
- }
- }
- // Slow case -- varint spans buffer seam.
- return upb_decode_varint_slow(d);
-}
-
-FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
- if (upb_decoder_bufleft(d) >= bytes) {
- // Fast case.
- memcpy(buf, d->ptr, bytes);
- upb_decoder_advance(d, bytes);
} else {
- // Slow case.
- size_t read = 0;
- while (1) {
- size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read);
- memcpy(buf + read, d->ptr, avail);
- upb_decoder_advance(d, avail);
- read += avail;
- if (read == bytes) break;
- upb_pullbuf(d);
- }
+ // Slow case -- varint spans buffer seam.
+ return decode_varint_slow(d);
}
}
-FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
+FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) {
uint32_t u32;
- upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+ getbytes(d, &u32, 4);
return u32; // TODO: proper byte swapping for big-endian machines.
}
-FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
+
+FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) {
uint64_t u64;
- upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+ getbytes(d, &u64, 8);
return u64; // TODO: proper byte swapping for big-endian machines.
}
-INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) {
- upb_decoder_frame *fr = d->top + 1;
- if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) {
- upb_decoder_abortjmp(d, "Nesting too deep.");
- }
+static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence,
+ bool is_packed, int32_t group_fieldnum, uint64_t end) {
+ frame *fr = d->top + 1;
+ if (fr >= d->limit) abortjmp(d, "Nesting too deep.");
fr->f = f;
- fr->is_sequence = false;
- fr->is_packed = false;
+ fr->is_sequence = is_sequence;
+ fr->is_packed = is_packed;
fr->end_ofs = end;
- fr->group_fieldnum = end == UPB_NONDELIMITED ?
- (int32_t)upb_fielddef_number(f) : -1;
+ fr->group_fieldnum = group_fieldnum;
d->top = fr;
- upb_decoder_setmsgend(d);
+ set_delim_end(d);
}
-INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed,
- uint64_t end_ofs) {
- upb_decoder_frame *fr = d->top + 1;
- if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) {
- upb_decoder_abortjmp(d, "Nesting too deep.");
- }
- fr->f = f;
- fr->is_sequence = true;
- fr->group_fieldnum = -1;
- fr->is_packed = packed;
- fr->end_ofs = end_ofs;
- d->top = fr;
- upb_decoder_setmsgend(d);
+static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) {
+ if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG)))
+ abortjmp(d, "startsubmsg failed.");
+ int32_t group_fieldnum = (end == UPB_NONDELIMITED) ?
+ (int32_t)upb_fielddef_number(f) : -1;
+ push(d, f, false, false, group_fieldnum, end);
+}
+
+static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed,
+ uint64_t end_ofs) {
+ if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ)))
+ abortjmp(d, "startseq failed.");
+ push(d, f, true, packed, -1, end_ofs);
+}
+
+static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len,
+ uint64_t end) {
+ if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len))
+ abortjmp(d, "startseq failed.");
+ push(d, f, false, false, -1, end);
}
-INLINE void upb_pop_submsg(upb_decoder *d) {
- upb_sink_endsubmsg(&d->sink, d->top->f);
+static void pop_submsg(upb_pbdecoder *d) {
+ upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG));
d->top--;
- upb_decoder_setmsgend(d);
+ set_delim_end(d);
}
-INLINE void upb_pop_seq(upb_decoder *d) {
- upb_sink_endseq(&d->sink, d->top->f);
+static void pop_seq(upb_pbdecoder *d) {
+ upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ));
d->top--;
- upb_decoder_setmsgend(d);
+ set_delim_end(d);
+}
+
+static void pop_string(upb_pbdecoder *d) {
+ upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR));
+ d->top--;
+ set_delim_end(d);
+}
+
+static void checkdelim(upb_pbdecoder *d) {
+ while (d->delim_end && d->ptr >= d->delim_end) {
+ // TODO(haberman): not sure what to do about this; if we detect this error
+ // we can possibly violate the promise that errors are always signaled by a
+ // short "parsed byte" count (because all bytes might have been successfully
+ // parsed prior to detecting this error).
+ // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end");
+ if (d->top->is_sequence) {
+ pop_seq(d);
+ } else {
+ pop_submsg(d);
+ }
+ }
}
@@ -374,95 +517,79 @@ INLINE void upb_pop_seq(upb_decoder *d) {
// properly sign-extended. We could detect this and error about the data loss,
// but proto2 does not do this, so we pass.
-#define T(type, wt, name, convfunc) \
- INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \
- upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \
+#define T(type, sel, wt, name, convfunc) \
+ static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \
+ upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \
+ (convfunc)(decode_ ## wt(d))); \
} \
static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
-T(INT32, varint, int32, int32_t)
-T(INT64, varint, int64, int64_t)
-T(UINT32, varint, uint32, uint32_t)
-T(UINT64, varint, uint64, uint64_t)
-T(FIXED32, fixed32, uint32, uint32_t)
-T(FIXED64, fixed64, uint64, uint64_t)
-T(SFIXED32, fixed32, int32, int32_t)
-T(SFIXED64, fixed64, int64, int64_t)
-T(BOOL, varint, bool, bool)
-T(ENUM, varint, int32, int32_t)
-T(DOUBLE, fixed64, double, upb_asdouble)
-T(FLOAT, fixed32, float, upb_asfloat)
-T(SINT32, varint, int32, upb_zzdec_32)
-T(SINT64, varint, int64, upb_zzdec_64)
+T(INT32, INT32, varint, int32, int32_t)
+T(INT64, INT64, varint, int64, int64_t)
+T(UINT32, UINT32, varint, uint32, uint32_t)
+T(UINT64, UINT64, varint, uint64, uint64_t)
+T(FIXED32, UINT32, fixed32, uint32, uint32_t)
+T(FIXED64, UINT64, fixed64, uint64, uint64_t)
+T(SFIXED32, INT32, fixed32, int32, int32_t)
+T(SFIXED64, INT64, fixed64, int64, int64_t)
+T(BOOL, BOOL, varint, bool, bool)
+T(ENUM, INT32, varint, int32, int32_t)
+T(DOUBLE, DOUBLE, fixed64, double, upb_asdouble)
+T(FLOAT, FLOAT, fixed32, float, upb_asfloat)
+T(SINT32, INT32, varint, int32, upb_zzdec_32)
+T(SINT64, INT64, varint, int64, upb_zzdec_64)
#undef T
-static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) {
- upb_push_msg(d, f, UPB_NONDELIMITED);
-}
-
-static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) {
- uint32_t len = upb_decode_varint32(d);
- upb_push_msg(d, f, upb_decoder_offset(d) + len);
-}
-
-static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) {
- uint32_t strlen = upb_decode_varint32(d);
- uint64_t offset = upb_decoder_offset(d);
- uint64_t end = offset + strlen;
- if (end > upb_byteregion_endofs(d->input))
- upb_decoder_abortjmp(d, "Unexpected EOF");
- upb_sink_startstr(&d->sink, f, strlen);
- while (strlen > 0) {
- if (upb_byteregion_available(d->input, offset) == 0)
- upb_pullbuf(d);
- size_t len;
- const char *ptr = upb_byteregion_getptr(d->input, offset, &len);
- len = UPB_MIN(len, strlen);
- len = upb_sink_putstring(&d->sink, f, ptr, len);
- if (len > strlen)
- upb_decoder_abortjmp(d, "Skipped too many bytes.");
- offset += len;
- strlen -= len;
- upb_decoder_discardto(d, offset);
- }
- upb_sink_endstr(&d->sink, f);
+static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) {
+ push_msg(d, f, UPB_NONDELIMITED);
}
+static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) {
+ uint32_t len = decode_v32(d);
+ push_msg(d, f, offset(d) + len);
+}
-/* The main decoding loop *****************************************************/
-
-static void upb_decoder_checkdelim(upb_decoder *d) {
- // TODO: This doesn't work for the case that no buffer is currently loaded
- // (ie. d->buf == NULL) because delim_end is NULL even if we are at
- // end-of-delim. Need to add a test that exercises this by putting a buffer
- // seam in the middle of the final delimited value in a proto that we skip
- // for some reason (like because it's unknown and we have no unknown field
- // handler).
- while (d->delim_end != NULL && d->ptr >= d->delim_end) {
- if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
- if (d->top->is_sequence) {
- upb_pop_seq(d);
- } else {
- upb_pop_submsg(d);
+static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) {
+ uint32_t strlen = decode_v32(d);
+ if (strlen <= bufleft(d)) {
+ upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen);
+ if (strlen)
+ upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
+ d->ptr, strlen);
+ upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR));
+ advance(d, strlen);
+ } else {
+ // Buffer ends in the middle of the string; need to push a decoder frame
+ // for it.
+ push_str(d, f, strlen, offset(d) + strlen);
+ if (bufleft(d)) {
+ upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
+ d->ptr, bufleft(d));
+ advance(d, bufleft(d));
}
+ d->bufstart_ofs = offset(d);
+ d->residual_end = d->residual;
+ suspendjmp(d);
}
}
-INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
+
+/* The main decoding loop *****************************************************/
+
+static const upb_fielddef *decode_tag(upb_pbdecoder *d) {
while (1) {
- uint32_t tag;
- if (!upb_trydecode_varint32(d, &tag)) return NULL;
+ uint32_t tag = decode_v32(d);
uint8_t wire_type = tag & 0x7;
uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL;
- const upb_handlers *h = upb_sink_tophandlers(&d->sink);
+ const upb_handlers *h = upb_sinkframe_handlers(upb_sink_top(d->sink));
f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum);
bool packed = false;
if (f) {
// Wire type check.
- upb_fieldtype_t type = upb_fielddef_type(f);
+ upb_descriptortype_t type = upb_fielddef_descriptortype(f);
if (wire_type == upb_decoder_types[type].native_wire_type) {
// Wire type is ok.
} else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
@@ -477,18 +604,19 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
// There are no explicit "startseq" or "endseq" markers in protobuf
// streams, so we have to infer them by noticing when a repeated field
// starts or ends.
- upb_decoder_frame *fr = d->top;
+ frame *fr = d->top;
if (fr->is_sequence && fr->f != f) {
- upb_pop_seq(d);
+ pop_seq(d);
fr = d->top;
}
if (f && upb_fielddef_isseq(f) && !fr->is_sequence) {
if (packed) {
- uint32_t len = upb_decode_varint32(d);
- upb_push_seq(d, f, true, upb_decoder_offset(d) + len);
+ uint32_t len = decode_v32(d);
+ push_seq(d, f, true, offset(d) + len);
+ checkpoint(d);
} else {
- upb_push_seq(d, f, false, fr->end_ofs);
+ push_seq(d, f, false, fr->end_ofs);
}
}
@@ -496,118 +624,202 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
// Unknown field or ENDGROUP.
if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
- upb_decoder_abortjmp(d, "Invalid field number");
+ abortjmp(d, "Invalid field number");
switch (wire_type) {
- case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
- case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break;
- case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break;
- case UPB_WIRE_TYPE_DELIMITED:
- upb_decoder_discard(d, upb_decode_varint32(d)); break;
+ case UPB_WIRE_TYPE_VARINT: decode_varint(d); break;
+ case UPB_WIRE_TYPE_32BIT: skip(d, 4); break;
+ case UPB_WIRE_TYPE_64BIT: skip(d, 8); break;
+ case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break;
case UPB_WIRE_TYPE_START_GROUP:
- upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
+ abortjmp(d, "Can't handle unknown groups yet");
case UPB_WIRE_TYPE_END_GROUP:
if (fieldnum != fr->group_fieldnum)
- upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
- upb_sink_endsubmsg(&d->sink, fr->f);
- d->top--;
- upb_decoder_setmsgend(d);
+ abortjmp(d, "Unmatched ENDGROUP tag");
+ pop_submsg(d);
break;
default:
- upb_decoder_abortjmp(d, "Invalid wire type");
+ abortjmp(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
- upb_decoder_checkpoint(d);
- upb_decoder_checkdelim(d);
+ checkpoint(d);
+ checkdelim(d);
}
}
-upb_success_t upb_decoder_decode(upb_decoder *d) {
- assert(d->input);
+void *start(const upb_sinkframe *fr, size_t size_hint) {
+ UPB_UNUSED(size_hint);
+ upb_pbdecoder *d = upb_sinkframe_userdata(fr);
+ assert(d);
+ assert(d->sink);
+ upb_sink_startmsg(d->sink);
+ return d;
+}
+
+bool end(const upb_sinkframe *fr) {
+ upb_pbdecoder *d = upb_sinkframe_userdata(fr);
+
+ if (d->residual_end > d->residual) {
+ // We have preserved bytes.
+ upb_status_seterrliteral(decoder_status(d), "Unexpected EOF");
+ return false;
+ }
+
+ // We may need to dispatch a top-level implicit frame.
+ if (d->top == d->stack + 1 &&
+ d->top->is_sequence &&
+ !d->top->is_packed) {
+ assert(upb_sinkframe_depth(upb_sink_top(d->sink)) == 1);
+ pop_seq(d);
+ }
+ if (d->top != d->stack) {
+ upb_status_seterrliteral(
+ decoder_status(d), "Ended inside delimited field.");
+ return false;
+ }
+ upb_sink_endmsg(d->sink);
+ return true;
+}
+
+size_t decode(const upb_sinkframe *fr, const char *buf, size_t size) {
+ upb_pbdecoder *d = upb_sinkframe_userdata(fr);
+ decoderplan *plan = upb_sinkframe_handlerdata(fr);
+ UPB_UNUSED(plan);
+ assert(upb_sinkframe_handlers(upb_sink_top(d->sink)) == plan->dest_handlers);
+
+ if (size == 0) return 0;
+ // Assume we'll consume the whole buffer unless this is overwritten.
+ d->ret = size;
+
if (_setjmp(d->exitjmp)) {
- assert(!upb_ok(&d->status));
- return UPB_ERROR;
+ // Hit end-of-buffer or error.
+ return d->ret;
+ }
+
+ d->buf_param = buf;
+ d->size_param = size;
+ if (d->residual_end > d->residual) {
+ // We have residual bytes from the last buffer.
+ d->userbuf_remaining = size;
+ } else {
+ d->userbuf_remaining = 0;
+ advancetobuf(d, buf, size);
+
+ if (d->top != d->stack &&
+ upb_fielddef_isstring(d->top->f) &&
+ !d->top->is_sequence) {
+ // Last buffer ended in the middle of a string; deliver more of it.
+ size_t len = d->top->end_ofs - offset(d);
+ if (size >= len) {
+ upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
+ d->ptr, len);
+ advance(d, len);
+ pop_string(d);
+ } else {
+ upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
+ d->ptr, size);
+ advance(d, size);
+ d->residual_end = d->residual;
+ advancetobuf(d, d->residual, 0);
+ return size;
+ }
+ }
}
- upb_sink_startmsg(&d->sink);
- // Prime the buf so we can hit the JIT immediately.
- upb_trypullbuf(d);
+ checkpoint(d);
+
const upb_fielddef *f = d->top->f;
while(1) {
#ifdef UPB_USE_JIT_X64
- upb_decoder_enterjit(d);
- upb_decoder_checkpoint(d);
- upb_decoder_setmsgend(d);
+ upb_decoder_enterjit(d, plan);
+ checkpoint(d);
+ set_delim_end(d); // JIT doesn't keep this current.
#endif
- upb_decoder_checkdelim(d);
- if (!d->top_is_packed) f = upb_decode_tag(d);
- if (!f) {
- // Sucessful EOF. We may need to dispatch a top-level implicit frame.
- if (d->top->is_sequence) {
- assert(d->sink.top == d->sink.stack + 1);
- upb_pop_seq(d);
- }
- assert(d->top == d->stack);
- upb_sink_endmsg(&d->sink, &d->status);
- return UPB_OK;
+ checkdelim(d);
+ if (!d->top->is_packed) {
+ f = decode_tag(d);
}
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break;
- case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break;
- case UPB_TYPE(INT64): upb_decode_INT64(d, f); break;
- case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break;
- case UPB_TYPE(INT32): upb_decode_INT32(d, f); break;
- case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break;
- case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break;
- case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break;
- case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break;
- case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break;
- case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break;
- case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break;
- case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break;
- case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break;
- case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break;
- case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break;
- case UPB_TYPE_NONE: assert(false); break;
+ switch (upb_fielddef_descriptortype(f)) {
+ case UPB_DESCRIPTOR_TYPE_DOUBLE: decode_DOUBLE(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_FLOAT: decode_FLOAT(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_INT64: decode_INT64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_UINT64: decode_UINT64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_INT32: decode_INT32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_FIXED64: decode_FIXED64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_FIXED32: decode_FIXED32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_BOOL: decode_BOOL(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_STRING: UPB_FALLTHROUGH_INTENDED;
+ case UPB_DESCRIPTOR_TYPE_BYTES: decode_STRING(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_GROUP: decode_GROUP(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_MESSAGE: decode_MESSAGE(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_UINT32: decode_UINT32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_ENUM: decode_ENUM(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SINT32: decode_SINT32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SINT64: decode_SINT64(d, f); break;
}
- upb_decoder_checkpoint(d);
+ checkpoint(d);
}
}
-void upb_decoder_init(upb_decoder *d) {
- upb_status_init(&d->status);
- d->plan = NULL;
- d->input = NULL;
+void init(void *_d) {
+ upb_pbdecoder *d = _d;
d->limit = &d->stack[UPB_MAX_NESTING];
+ d->sink = NULL;
+ // reset() must be called before decoding; this is guaranteed by assert() in
+ // start().
}
-void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) {
- d->plan = p;
- d->input = NULL;
- upb_sink_init(&d->sink, p->handlers);
-}
-
-void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
- void *c) {
- assert(d->plan);
- upb_status_clear(&d->status);
- upb_sink_reset(&d->sink, c);
- d->input = input;
-
+void reset(void *_d) {
+ upb_pbdecoder *d = _d;
d->top = d->stack;
d->top->is_sequence = false;
d->top->is_packed = false;
d->top->group_fieldnum = UINT32_MAX;
d->top->end_ofs = UPB_NONDELIMITED;
-
- // Protect against assert in skiptonewbuf().
d->bufstart_ofs = 0;
- d->ptr = NULL;
- d->buf = NULL;
- upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input));
+ d->ptr = d->residual;
+ d->buf = d->residual;
+ d->end = d->residual;
+ d->residual_end = d->residual;
}
-void upb_decoder_uninit(upb_decoder *d) {
- upb_status_uninit(&d->status);
+bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) {
+ // TODO(haberman): typecheck the sink, and test whether the decoder is in the
+ // middle of decoding. Return false if either assumption is violated.
+ d->sink = sink;
+ reset(d);
+ return true;
+}
+
+const upb_frametype upb_pbdecoder_frametype = {
+ sizeof(upb_pbdecoder),
+ init,
+ NULL,
+ reset,
+};
+
+const upb_frametype *upb_pbdecoder_getframetype() {
+ return &upb_pbdecoder_frametype;
+}
+
+const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
+ bool allowjit,
+ const void *owner) {
+ UPB_UNUSED(allowjit);
+ decoderplan *p = malloc(sizeof(*p));
+ assert(upb_handlers_isfrozen(dest));
+ p->dest_handlers = dest;
+ upb_handlers_ref(dest, p);
+#ifdef UPB_USE_JIT_X64
+ p->jit_code = NULL;
+ if (allowjit) upb_decoderplan_makejit(p);
+#endif
+
+ upb_handlers *h = upb_handlers_new(
+ UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner);
+ upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL);
+ upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan);
+ upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL);
+ return h;
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback