diff options
author | Joshua Haberman <joshua@reverberate.org> | 2011-05-20 11:26:27 -0700 |
---|---|---|
committer | Joshua Haberman <joshua@reverberate.org> | 2011-05-20 11:26:27 -0700 |
commit | 0941664215ed7fa4a8d53b6387d50c56df6757d0 (patch) | |
tree | 9125c22f6892015e05fa709426a6cc8b082972ad /src/upb_decoder.c | |
parent | 74102e836d285bcfcb4c22cbe72a3a36828d30cb (diff) |
Add startseq/endseq handlers.
Startseq/endseq handlers are called at the beginning
and end of a sequence of repeated values. Protobuf
does not really have direct support for this (repeated
primitive fields do not delimit "begin" and "end" of
the sequence) but we can infer them from the bytestream.
The benefit of supporting them explicitly is that they
get their own stack frame and closure, so we can avoid
having to find the array's address over and over and
deciding if we need to initialize it.
This will also pave the way for better support of JSON,
which does have explicit "startseq/endseq" markers: [].
Diffstat (limited to 'src/upb_decoder.c')
-rw-r--r-- | src/upb_decoder.c | 71 |
1 files changed, 37 insertions, 34 deletions
diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 5bb148e..68fb7a5 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -12,6 +12,10 @@ #include "upb_decoder.h" #include "upb_varint.h" +// Used for frames that have no specific end offset: groups, repeated primitive +// fields inside groups, and the top-level message. +#define UPB_NONDELIMITED UINT32_MAX + #ifdef UPB_USE_JIT_X64 #define Dst_DECL upb_decoder *d #define Dst_REF (d->dynasm) @@ -20,11 +24,6 @@ #include "upb_decoder_x86.h" #endif -// A group continues until an END_GROUP tag is seen. -#define UPB_GROUPEND UINT32_MAX -// A non-packed repeated field ends when a diff. field is seen (or submsg end). -#define UPB_REPEATEDEND (UINT32_MAX-1) - // It's unfortunate that we have to micro-manage the compiler this way, // especially since this tuning is necessarily specific to one hardware // configuration. But emperically on a Core i7, performance increases 30-50% @@ -54,7 +53,7 @@ size_t upb_decoder_offset(upb_decoder *d) { static void upb_decoder_setmsgend(upb_decoder *d) { uint32_t end = d->dispatcher.top->end_offset; - d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end; + d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end; } // Pulls the next buffer from the bytesrc. Should be called only when the @@ -72,7 +71,7 @@ static void upb_pullbuf(upb_decoder *d, bool need) { if (last_buf_len != -1) { d->buf_stream_offset += last_buf_len; for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) - if (f->end_offset != UINT32_MAX) + if (f->end_offset != UPB_NONDELIMITED) f->end_offset -= last_buf_len; } d->buf = upb_string_getrobuf(d->bufstr); @@ -186,14 +185,6 @@ INLINE upb_string *upb_decode_string(upb_decoder *d) { return d->tmp; } -INLINE void upb_pop(upb_decoder *d) { - //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND) - // upb_dispatch_endseq(&d->dispatcher); - d->f = d->dispatcher.top->f; - upb_dispatch_endsubmsg(&d->dispatcher); - upb_decoder_setmsgend(d); -} - INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end; upb_decoder_setmsgend(d); @@ -235,11 +226,12 @@ T(SINT64, varint, int64, upb_zzdec_64) T(STRING, string, str, upb_string*) static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { - upb_push(d, f, UPB_GROUPEND); + upb_push(d, f, UPB_NONDELIMITED); } static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { (void)f; - upb_pop(d); + upb_dispatch_endsubmsg(&d->dispatcher); + upb_decoder_setmsgend(d); } static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); @@ -257,7 +249,13 @@ static void upb_delimend(upb_decoder *d) { upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); upb_decoder_exit(d); } - upb_pop(d); + + if (d->dispatcher.top->is_sequence) { + upb_dispatch_endseq(&d->dispatcher); + } else { + upb_dispatch_endsubmsg(&d->dispatcher); + } + upb_decoder_setmsgend(d); } static void upb_decoder_enterjit(upb_decoder *d) { @@ -276,10 +274,25 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { while (1) { uint32_t tag = upb_decode_varint32(d, false); upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); - if (f) { - d->f = f; - return f; + + // There are no explicit "startseq" or "endseq" markers in protobuf + // streams, so we have to infer them by noticing when a repeated field + // starts or ends. + if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) { + upb_dispatch_endseq(&d->dispatcher); + upb_decoder_setmsgend(d); + } + if (f && f->repeated && d->dispatcher.top->f != f) { + // TODO: support packed. + assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || + (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); + uint32_t end = d->dispatcher.top->end_offset; + upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end; + upb_decoder_setmsgend(d); } + if (f) return f; + + // Unknown field. switch (tag & 0x7) { case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; @@ -291,20 +304,10 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { // TODO: deliver to unknown field callback. while (d->ptr >= d->submsg_end) upb_delimend(d); } - - // Have to handle both packed and non-packed sequences of primitives. - //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) { - // upb_dispatch_endseq(&d->dispatcher); - //} else if (f->is_repeated_primitive) { - // if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) { - // upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); - // } else if (d->f != f) { - // upb_dispatch_startseq(d, f, UPB_REPEATEDEND); - // } - //} } void upb_decoder_onexit(upb_decoder *d) { + if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher); if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { // Normal end-of-file. upb_clearerr(d->status); @@ -336,7 +339,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, upb_dispatcher_frame *bottom) { (void)top; upb_decoder *d = _d; - if (bottom->end_offset == UINT32_MAX) { + if (bottom->end_offset == UPB_NONDELIMITED) { // TODO: support skipping groups. abort(); } @@ -386,7 +389,7 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { - upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UINT32_MAX; + upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED; d->bytesrc = bytesrc; d->buf = NULL; d->ptr = NULL; |