summaryrefslogtreecommitdiff
path: root/src/upb_decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/upb_decoder.c')
-rw-r--r--src/upb_decoder.c71
1 files changed, 37 insertions, 34 deletions
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 5bb148e..68fb7a5 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -12,6 +12,10 @@
#include "upb_decoder.h"
#include "upb_varint.h"
+// Used for frames that have no specific end offset: groups, repeated primitive
+// fields inside groups, and the top-level message.
+#define UPB_NONDELIMITED UINT32_MAX
+
#ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d
#define Dst_REF (d->dynasm)
@@ -20,11 +24,6 @@
#include "upb_decoder_x86.h"
#endif
-// A group continues until an END_GROUP tag is seen.
-#define UPB_GROUPEND UINT32_MAX
-// A non-packed repeated field ends when a diff. field is seen (or submsg end).
-#define UPB_REPEATEDEND (UINT32_MAX-1)
-
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
@@ -54,7 +53,7 @@ size_t upb_decoder_offset(upb_decoder *d) {
static void upb_decoder_setmsgend(upb_decoder *d) {
uint32_t end = d->dispatcher.top->end_offset;
- d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end;
+ d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end;
}
// Pulls the next buffer from the bytesrc. Should be called only when the
@@ -72,7 +71,7 @@ static void upb_pullbuf(upb_decoder *d, bool need) {
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
- if (f->end_offset != UINT32_MAX)
+ if (f->end_offset != UPB_NONDELIMITED)
f->end_offset -= last_buf_len;
}
d->buf = upb_string_getrobuf(d->bufstr);
@@ -186,14 +185,6 @@ INLINE upb_string *upb_decode_string(upb_decoder *d) {
return d->tmp;
}
-INLINE void upb_pop(upb_decoder *d) {
- //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND)
- // upb_dispatch_endseq(&d->dispatcher);
- d->f = d->dispatcher.top->f;
- upb_dispatch_endsubmsg(&d->dispatcher);
- upb_decoder_setmsgend(d);
-}
-
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end;
upb_decoder_setmsgend(d);
@@ -235,11 +226,12 @@ T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, str, upb_string*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, UPB_GROUPEND);
+ upb_push(d, f, UPB_NONDELIMITED);
}
static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
(void)f;
- upb_pop(d);
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
@@ -257,7 +249,13 @@ static void upb_delimend(upb_decoder *d) {
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
upb_decoder_exit(d);
}
- upb_pop(d);
+
+ if (d->dispatcher.top->is_sequence) {
+ upb_dispatch_endseq(&d->dispatcher);
+ } else {
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ }
+ upb_decoder_setmsgend(d);
}
static void upb_decoder_enterjit(upb_decoder *d) {
@@ -276,10 +274,25 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag = upb_decode_varint32(d, false);
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
- if (f) {
- d->f = f;
- return f;
+
+ // There are no explicit "startseq" or "endseq" markers in protobuf
+ // streams, so we have to infer them by noticing when a repeated field
+ // starts or ends.
+ if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+ upb_dispatch_endseq(&d->dispatcher);
+ upb_decoder_setmsgend(d);
+ }
+ if (f && f->repeated && d->dispatcher.top->f != f) {
+ // TODO: support packed.
+ assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
+ (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
+ uint32_t end = d->dispatcher.top->end_offset;
+ upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end;
+ upb_decoder_setmsgend(d);
}
+ if (f) return f;
+
+ // Unknown field.
switch (tag & 0x7) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
@@ -291,20 +304,10 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
// TODO: deliver to unknown field callback.
while (d->ptr >= d->submsg_end) upb_delimend(d);
}
-
- // Have to handle both packed and non-packed sequences of primitives.
- //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) {
- // upb_dispatch_endseq(&d->dispatcher);
- //} else if (f->is_repeated_primitive) {
- // if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) {
- // upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
- // } else if (d->f != f) {
- // upb_dispatch_startseq(d, f, UPB_REPEATEDEND);
- // }
- //}
}
void upb_decoder_onexit(upb_decoder *d) {
+ if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_clearerr(d->status);
@@ -336,7 +339,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
upb_dispatcher_frame *bottom) {
(void)top;
upb_decoder *d = _d;
- if (bottom->end_offset == UINT32_MAX) {
+ if (bottom->end_offset == UPB_NONDELIMITED) {
// TODO: support skipping groups.
abort();
}
@@ -386,7 +389,7 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
- upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UINT32_MAX;
+ upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
d->bytesrc = bytesrc;
d->buf = NULL;
d->ptr = NULL;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback