From 0941664215ed7fa4a8d53b6387d50c56df6757d0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 20 May 2011 11:26:27 -0700 Subject: Add startseq/endseq handlers. Startseq/endseq handlers are called at the beginning and end of a sequence of repeated values. Protobuf does not really have direct support for this (repeated primitive fields do not delimit "begin" and "end" of the sequence) but we can infer them from the bytestream. The benefit of supporting them explicitly is that they get their own stack frame and closure, so we can avoid having to find the array's address over and over and deciding if we need to initialize it. This will also pave the way for better support of JSON, which does have explicit "startseq/endseq" markers: []. --- src/upb_decoder.c | 71 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 34 deletions(-) (limited to 'src/upb_decoder.c') diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 5bb148e..68fb7a5 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -12,6 +12,10 @@ #include "upb_decoder.h" #include "upb_varint.h" +// Used for frames that have no specific end offset: groups, repeated primitive +// fields inside groups, and the top-level message. +#define UPB_NONDELIMITED UINT32_MAX + #ifdef UPB_USE_JIT_X64 #define Dst_DECL upb_decoder *d #define Dst_REF (d->dynasm) @@ -20,11 +24,6 @@ #include "upb_decoder_x86.h" #endif -// A group continues until an END_GROUP tag is seen. -#define UPB_GROUPEND UINT32_MAX -// A non-packed repeated field ends when a diff. field is seen (or submsg end). -#define UPB_REPEATEDEND (UINT32_MAX-1) - // It's unfortunate that we have to micro-manage the compiler this way, // especially since this tuning is necessarily specific to one hardware // configuration. But emperically on a Core i7, performance increases 30-50% @@ -54,7 +53,7 @@ size_t upb_decoder_offset(upb_decoder *d) { static void upb_decoder_setmsgend(upb_decoder *d) { uint32_t end = d->dispatcher.top->end_offset; - d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end; + d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end; } // Pulls the next buffer from the bytesrc. Should be called only when the @@ -72,7 +71,7 @@ static void upb_pullbuf(upb_decoder *d, bool need) { if (last_buf_len != -1) { d->buf_stream_offset += last_buf_len; for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) - if (f->end_offset != UINT32_MAX) + if (f->end_offset != UPB_NONDELIMITED) f->end_offset -= last_buf_len; } d->buf = upb_string_getrobuf(d->bufstr); @@ -186,14 +185,6 @@ INLINE upb_string *upb_decode_string(upb_decoder *d) { return d->tmp; } -INLINE void upb_pop(upb_decoder *d) { - //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND) - // upb_dispatch_endseq(&d->dispatcher); - d->f = d->dispatcher.top->f; - upb_dispatch_endsubmsg(&d->dispatcher); - upb_decoder_setmsgend(d); -} - INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end; upb_decoder_setmsgend(d); @@ -235,11 +226,12 @@ T(SINT64, varint, int64, upb_zzdec_64) T(STRING, string, str, upb_string*) static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { - upb_push(d, f, UPB_GROUPEND); + upb_push(d, f, UPB_NONDELIMITED); } static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { (void)f; - upb_pop(d); + upb_dispatch_endsubmsg(&d->dispatcher); + upb_decoder_setmsgend(d); } static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); @@ -257,7 +249,13 @@ static void upb_delimend(upb_decoder *d) { upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); upb_decoder_exit(d); } - upb_pop(d); + + if (d->dispatcher.top->is_sequence) { + upb_dispatch_endseq(&d->dispatcher); + } else { + upb_dispatch_endsubmsg(&d->dispatcher); + } + upb_decoder_setmsgend(d); } static void upb_decoder_enterjit(upb_decoder *d) { @@ -276,10 +274,25 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { while (1) { uint32_t tag = upb_decode_varint32(d, false); upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); - if (f) { - d->f = f; - return f; + + // There are no explicit "startseq" or "endseq" markers in protobuf + // streams, so we have to infer them by noticing when a repeated field + // starts or ends. + if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) { + upb_dispatch_endseq(&d->dispatcher); + upb_decoder_setmsgend(d); + } + if (f && f->repeated && d->dispatcher.top->f != f) { + // TODO: support packed. + assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || + (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); + uint32_t end = d->dispatcher.top->end_offset; + upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end; + upb_decoder_setmsgend(d); } + if (f) return f; + + // Unknown field. switch (tag & 0x7) { case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; @@ -291,20 +304,10 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { // TODO: deliver to unknown field callback. while (d->ptr >= d->submsg_end) upb_delimend(d); } - - // Have to handle both packed and non-packed sequences of primitives. - //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) { - // upb_dispatch_endseq(&d->dispatcher); - //} else if (f->is_repeated_primitive) { - // if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) { - // upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); - // } else if (d->f != f) { - // upb_dispatch_startseq(d, f, UPB_REPEATEDEND); - // } - //} } void upb_decoder_onexit(upb_decoder *d) { + if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher); if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { // Normal end-of-file. upb_clearerr(d->status); @@ -336,7 +339,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, upb_dispatcher_frame *bottom) { (void)top; upb_decoder *d = _d; - if (bottom->end_offset == UINT32_MAX) { + if (bottom->end_offset == UPB_NONDELIMITED) { // TODO: support skipping groups. abort(); } @@ -386,7 +389,7 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { - upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UINT32_MAX; + upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED; d->bytesrc = bytesrc; d->buf = NULL; d->ptr = NULL; -- cgit v1.2.3