summaryrefslogtreecommitdiff
path: root/src/upb_decoder.c
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-05-20 11:26:27 -0700
committerJoshua Haberman <joshua@reverberate.org>2011-05-20 11:26:27 -0700
commit0941664215ed7fa4a8d53b6387d50c56df6757d0 (patch)
tree9125c22f6892015e05fa709426a6cc8b082972ad /src/upb_decoder.c
parent74102e836d285bcfcb4c22cbe72a3a36828d30cb (diff)
Add startseq/endseq handlers.
Startseq/endseq handlers are called at the beginning and end of a sequence of repeated values. Protobuf does not really have direct support for this (repeated primitive fields do not delimit "begin" and "end" of the sequence) but we can infer them from the bytestream. The benefit of supporting them explicitly is that they get their own stack frame and closure, so we can avoid having to find the array's address over and over and deciding if we need to initialize it. This will also pave the way for better support of JSON, which does have explicit "startseq/endseq" markers: [].
Diffstat (limited to 'src/upb_decoder.c')
-rw-r--r--src/upb_decoder.c71
1 files changed, 37 insertions, 34 deletions
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 5bb148e..68fb7a5 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -12,6 +12,10 @@
#include "upb_decoder.h"
#include "upb_varint.h"
+// Used for frames that have no specific end offset: groups, repeated primitive
+// fields inside groups, and the top-level message.
+#define UPB_NONDELIMITED UINT32_MAX
+
#ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d
#define Dst_REF (d->dynasm)
@@ -20,11 +24,6 @@
#include "upb_decoder_x86.h"
#endif
-// A group continues until an END_GROUP tag is seen.
-#define UPB_GROUPEND UINT32_MAX
-// A non-packed repeated field ends when a diff. field is seen (or submsg end).
-#define UPB_REPEATEDEND (UINT32_MAX-1)
-
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
@@ -54,7 +53,7 @@ size_t upb_decoder_offset(upb_decoder *d) {
static void upb_decoder_setmsgend(upb_decoder *d) {
uint32_t end = d->dispatcher.top->end_offset;
- d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end;
+ d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end;
}
// Pulls the next buffer from the bytesrc. Should be called only when the
@@ -72,7 +71,7 @@ static void upb_pullbuf(upb_decoder *d, bool need) {
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
- if (f->end_offset != UINT32_MAX)
+ if (f->end_offset != UPB_NONDELIMITED)
f->end_offset -= last_buf_len;
}
d->buf = upb_string_getrobuf(d->bufstr);
@@ -186,14 +185,6 @@ INLINE upb_string *upb_decode_string(upb_decoder *d) {
return d->tmp;
}
-INLINE void upb_pop(upb_decoder *d) {
- //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND)
- // upb_dispatch_endseq(&d->dispatcher);
- d->f = d->dispatcher.top->f;
- upb_dispatch_endsubmsg(&d->dispatcher);
- upb_decoder_setmsgend(d);
-}
-
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end;
upb_decoder_setmsgend(d);
@@ -235,11 +226,12 @@ T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, str, upb_string*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, UPB_GROUPEND);
+ upb_push(d, f, UPB_NONDELIMITED);
}
static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
(void)f;
- upb_pop(d);
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
@@ -257,7 +249,13 @@ static void upb_delimend(upb_decoder *d) {
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
upb_decoder_exit(d);
}
- upb_pop(d);
+
+ if (d->dispatcher.top->is_sequence) {
+ upb_dispatch_endseq(&d->dispatcher);
+ } else {
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ }
+ upb_decoder_setmsgend(d);
}
static void upb_decoder_enterjit(upb_decoder *d) {
@@ -276,10 +274,25 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag = upb_decode_varint32(d, false);
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
- if (f) {
- d->f = f;
- return f;
+
+ // There are no explicit "startseq" or "endseq" markers in protobuf
+ // streams, so we have to infer them by noticing when a repeated field
+ // starts or ends.
+ if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+ upb_dispatch_endseq(&d->dispatcher);
+ upb_decoder_setmsgend(d);
+ }
+ if (f && f->repeated && d->dispatcher.top->f != f) {
+ // TODO: support packed.
+ assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
+ (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
+ uint32_t end = d->dispatcher.top->end_offset;
+ upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end;
+ upb_decoder_setmsgend(d);
}
+ if (f) return f;
+
+ // Unknown field.
switch (tag & 0x7) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
@@ -291,20 +304,10 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
// TODO: deliver to unknown field callback.
while (d->ptr >= d->submsg_end) upb_delimend(d);
}
-
- // Have to handle both packed and non-packed sequences of primitives.
- //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) {
- // upb_dispatch_endseq(&d->dispatcher);
- //} else if (f->is_repeated_primitive) {
- // if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) {
- // upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
- // } else if (d->f != f) {
- // upb_dispatch_startseq(d, f, UPB_REPEATEDEND);
- // }
- //}
}
void upb_decoder_onexit(upb_decoder *d) {
+ if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_clearerr(d->status);
@@ -336,7 +339,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
upb_dispatcher_frame *bottom) {
(void)top;
upb_decoder *d = _d;
- if (bottom->end_offset == UINT32_MAX) {
+ if (bottom->end_offset == UPB_NONDELIMITED) {
// TODO: support skipping groups.
abort();
}
@@ -386,7 +389,7 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
- upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UINT32_MAX;
+ upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
d->bytesrc = bytesrc;
d->buf = NULL;
d->ptr = NULL;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback