From 8ef6873e0e14309a1715a252a650bab0ae1a33ef Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sun, 20 Mar 2011 13:13:51 -0700 Subject: upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37) --- src/upb_decoder.c | 156 +++++++++++++++++++++++------------------------------- 1 file changed, 65 insertions(+), 91 deletions(-) (limited to 'src/upb_decoder.c') diff --git a/src/upb_decoder.c b/src/upb_decoder.c index dc5dafd..6fa37b4 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -4,13 +4,11 @@ * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details. */ -#include "upb_decoder.h" -#include "upb_varint_decoder.h" - #include #include #include -#include "upb_def.h" +#include "upb_decoder.h" +#include "upb_varint_decoder.h" // If the return value is other than UPB_CONTINUE, that is what the last // callback returned. @@ -37,7 +35,12 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } INLINE void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; - //d->bytes_parsed_slow += len; +} + +INLINE size_t upb_decoder_offset(upb_decoder *d) { + size_t offset = d->buf_stream_offset; + if (d->buf) offset += (d->ptr - upb_string_getrobuf(d->buf)); + return offset; } INLINE size_t upb_decoder_bufleft(upb_decoder *d) { @@ -45,13 +48,11 @@ INLINE size_t upb_decoder_bufleft(upb_decoder *d) { } INLINE void upb_dstate_setmsgend(upb_decoder *d) { - d->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? - (void*)UINTPTR_MAX : - upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); + size_t end_offset = d->dispatcher.top->end_offset; + d->submsg_end = (end_offset == UPB_GROUP_END_OFFSET) ? (void*)UINTPTR_MAX : + d->ptr + (end_offset - upb_decoder_offset(d)); } -static upb_flow_t upb_pop(upb_decoder *d); - // Called only from the slow path, this function copies the next "len" bytes // from the stream to "data", adjusting the dstate appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) { @@ -186,54 +187,44 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { return upb_types[ft].native_wire_type == wt; } -static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, - upb_value submsg_len, upb_fieldtype_t type) { - ++d->top; - if(d->top >= d->limit) { - upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); - return UPB_ERROR; - } - d->top->end_offset = (type == UPB_TYPE(GROUP)) ? - UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (d->ptr - upb_string_getrobuf(d->buf)) + - upb_value_getint32(submsg_len); - d->top->f = f; - d->msgdef = upb_downcast_msgdef(f->def); +static upb_flow_t upb_pop(upb_decoder *d) { + upb_flow_t ret = upb_dispatch_endsubmsg(&d->dispatcher); upb_dstate_setmsgend(d); - upb_flow_t ret = upb_dispatch_startsubmsg(&d->dispatcher, f); - if (ret == UPB_SKIPSUBMSG) { - if (type == UPB_TYPE(GROUP)) { - fprintf(stderr, "upb_decoder: Can't skip groups yet.\n"); - abort(); - } - upb_decoder_advance(d, upb_value_getint32(submsg_len)); - --d->top; - upb_dstate_setmsgend(d); - ret = UPB_CONTINUE; - } return ret; } -static upb_flow_t upb_pop(upb_decoder *d) { - --d->top; - d->msgdef = upb_downcast_msgdef(d->top->f->def); +static upb_flow_t upb_decoder_skipsubmsg(upb_decoder *d) { + if (d->dispatcher.top->f->type == UPB_TYPE(GROUP)) { + fprintf(stderr, "upb_decoder: Can't skip groups yet.\n"); + abort(); + } + upb_decoder_advance(d, d->dispatcher.top->end_offset - d->buf_stream_offset - + (d->ptr - upb_string_getrobuf(d->buf))); + upb_pop(d); + return UPB_CONTINUE; +} + +static upb_flow_t upb_push(upb_decoder *d, upb_handlers_fieldent *f, + upb_value submsg_len) { + upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, + (f->type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : + upb_decoder_offset(d) + upb_value_getint32(submsg_len)); upb_dstate_setmsgend(d); - return upb_dispatch_endsubmsg(&d->dispatcher, d->top->f); + return flow; } -void upb_decoder_run(upb_src *src, upb_status *status) { - upb_decoder *d = (upb_decoder*)src; +void upb_decoder_decode(upb_decoder *d, upb_status *status) { d->status = status; - d->ptr = NULL; - d->end = NULL; // Force a buffer pull. - d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered. - d->msgdef = upb_downcast_msgdef(d->top->f->def); -// TODO: handle UPB_SKIPSUBMSG -#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { /*assert(!upb_ok(status));*/ goto callback_err; } +#define CHECK_FLOW(expr) \ + switch (expr) { \ + case UPB_BREAK: goto callback_err; \ + case UPB_SKIPSUBMSG: upb_decoder_skipsubmsg(d); continue; \ + default: break; /* continue normally. */ \ + } #define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } - CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); + if (upb_dispatch_startmsg(&d->dispatcher, d->closure) != UPB_CONTINUE) goto err; // Main loop: executed once per tag/field pair. while(1) { @@ -261,10 +252,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Parse/handle tag. upb_tag tag; if (!upb_decode_tag(d, &tag)) { - if (status->code == UPB_EOF && d->top == d->stack) { + if (status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { // Normal end-of-file. upb_clearerr(status); - CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + upb_dispatch_endmsg(&d->dispatcher, status); return; } else { if (status->code == UPB_EOF) { @@ -282,7 +273,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { case UPB_WIRE_TYPE_START_GROUP: break; // Nothing to do now, below we will push appropriately. case UPB_WIRE_TYPE_END_GROUP: - if(d->top->end_offset != UPB_GROUP_END_OFFSET) { + if(d->dispatcher.top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } @@ -302,41 +293,38 @@ void upb_decoder_run(upb_src *src, upb_status *status) { } // Look up field by tag number. - upb_itof_ent *e = upb_msgdef_itofent(d->msgdef, tag.field_number); + upb_dispatcher_field *f = + upb_dispatcher_lookup(&d->dispatcher, tag.field_number); - if (!e) { + if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) CHECK(upb_decode_string(d, &val, &d->tmp)); CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val)); continue; } - upb_fielddef *f = e->f; - assert(e->field_type == f->type); - assert(e->native_wire_type == upb_types[f->type].native_wire_type); - - if (tag.wire_type != e->native_wire_type) { + if (tag.wire_type != f->native_wire_type) { // TODO: Support packed fields. - upb_seterr(status, UPB_ERROR, "Field had incorrect type, name: " UPB_STRFMT - ", field type: %d, expected wire type %d, actual wire type: %d", - UPB_STRARG(f->name), f->type, upb_types[f->type].native_wire_type, - tag.wire_type); - upb_printerr(status); + upb_seterr(status, UPB_ERROR, "Field had incorrect type, field number: %d" + ", field type: %d, expected wire type: %d, " + "actual wire type: %d, offset: %d", + tag.field_number, f->type, upb_types[f->type].native_wire_type, + tag.wire_type, upb_decoder_offset(d)); goto err; } - // Perform any further massaging of the data now that we have the fielddef. - // Now we can distinguish strings from submessages, and we know about - // zig-zag-encoded types. + // Perform any further massaging of the data now that we have the field's + // type. Now we can distinguish strings from submessages, and we know + // about zig-zag-encoded types. // TODO: handle packed encoding. // TODO: if we were being paranoid, we could check for 32-bit-varint types // that the top 32 bits all match the highest bit of the low 32 bits. // If this is not true we are losing data. But the main protobuf library // doesn't check this, and it would slow us down, so pass for now. - switch (e->field_type) { + switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, f, val, e->field_type)); + CHECK_FLOW(upb_push(d, f, val)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): @@ -358,7 +346,6 @@ void upb_decoder_run(upb_src *src, upb_status *status) { } callback_err: - upb_copyerr(status, d->dispatcher.top->handlers.status); if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); } @@ -366,37 +353,24 @@ err: assert(!upb_ok(status)); } -void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { - upb_decoder *d = (upb_decoder*)src; - upb_dispatcher_reset(&d->dispatcher, handlers, true); - d->top = d->stack; - d->buf_stream_offset = 0; -} - -void upb_decoder_init(upb_decoder *d, upb_msgdef *msgdef) { - static upb_src_vtbl vtbl = { - &upb_decoder_sethandlers, - &upb_decoder_run, - }; - upb_src_init(&d->src, &vtbl); - upb_dispatcher_init(&d->dispatcher); - d->f.def = UPB_UPCAST(msgdef); - d->stack[0].f = &d->f; - // Never want to end top-level message, so treat it like a group. - d->stack[0].end_offset = UPB_GROUP_END_OFFSET; - d->limit = &d->stack[UPB_MAX_NESTING]; +void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { + upb_dispatcher_init(&d->dispatcher, handlers, UPB_GROUP_END_OFFSET); d->buf = NULL; d->tmp = NULL; } -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { d->bytesrc = bytesrc; - d->top = &d->stack[0]; + d->closure = closure; + upb_dispatcher_reset(&d->dispatcher); + d->ptr = NULL; + d->end = NULL; // Force a buffer pull. + d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered. + d->buf_stream_offset = 0; } void upb_decoder_uninit(upb_decoder *d) { + upb_dispatcher_uninit(&d->dispatcher); upb_string_unref(d->buf); upb_string_unref(d->tmp); } - -upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } -- cgit v1.2.3