From 8ef6873e0e14309a1715a252a650bab0ae1a33ef Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sun, 20 Mar 2011 13:13:51 -0700 Subject: upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37) --- src/upb_stream.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 src/upb_stream.c (limited to 'src/upb_stream.c') diff --git a/src/upb_stream.c b/src/upb_stream.c new file mode 100644 index 0000000..3634d5d --- /dev/null +++ b/src/upb_stream.c @@ -0,0 +1,328 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "upb_stream.h" + + +/* upb_handlers ***************************************************************/ + +static upb_flow_t upb_startmsg_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +static void upb_endmsg_nop(void *closure, upb_status *status) { + (void)closure; + (void)status; +} + +static upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) { + (void)closure; + (void)fval; + (void)val; + return UPB_CONTINUE; +} + +static upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) { + (void)fval; + return UPB_CONTINUE_WITH(closure); +} + +static upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) { + (void)closure; + (void)fval; + return UPB_CONTINUE; +} + +static upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + +static void upb_msgent_init(upb_handlers_msgent *e) { + upb_inttable_init(&e->fieldtab, 8, sizeof(upb_handlers_fieldent)); + e->startmsg = &upb_startmsg_nop; + e->endmsg = &upb_endmsg_nop; + e->unknownval = &upb_unknownval_nop; +} + +void upb_handlers_init(upb_handlers *h, upb_msgdef *md) { + h->msgs_len = 1; + h->msgs_size = 4; + h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); + h->top = &h->stack[0]; + h->limit = &h->stack[UPB_MAX_TYPE_DEPTH]; + h->toplevel_msgdef = md; + if (md) upb_msgdef_ref(md); + + h->top->msgent_index = 0; + h->top->msgdef = md; + h->msgent = &h->msgs[0]; + upb_msgent_init(h->msgent); +} + +void upb_handlers_uninit(upb_handlers *h) { + for (int i = 0; i < h->msgs_len; i++) upb_inttable_free(&h->msgs[i].fieldtab); + free(h->msgs); + upb_msgdef_unref(h->toplevel_msgdef); +} + +static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval( + upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type) { + upb_handlers_fieldent *f = + upb_inttable_lookup(&h->msgent->fieldtab, fieldnum); + if (!f) { + upb_wire_type_t native_wire_type = upb_types[type].native_wire_type; + upb_handlers_fieldent new_f = { + false, type, native_wire_type, -1, UPB_NO_VALUE, + {&upb_value_nop}, &upb_endsubmsg_nop}; + if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop; + upb_inttable_insert(&h->msgent->fieldtab, fieldnum, &new_f); + + f = upb_inttable_lookup(&h->msgent->fieldtab, fieldnum); + assert(f); + } + assert(f->type == type); + return f; +} + +static upb_handlers_fieldent *upb_handlers_getorcreate( + upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, upb_value fval) { + upb_handlers_fieldent *f = + upb_handlers_getorcreate_without_fval(h, fieldnum, type); + f->fval = fval; + return f; +} + +void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, + upb_endmsg_handler_t endmsg) { + h->msgent->startmsg = startmsg ? startmsg : &upb_startmsg_nop; + h->msgent->endmsg = endmsg ? endmsg : &upb_endmsg_nop; +} + +// TODO: +// void upb_register_unknownval(upb_handlers *h, +// upb_unknownval_handler_t unknown); +// bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); +// void upb_register_path_value(upb_handlers *h, const char *path, +// upb_value_handler_t value, upb_value fval); + +void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, + upb_endmsg_handler_t end, + upb_value_handler_t value, + upb_startsubmsg_handler_t startsubmsg, + upb_endsubmsg_handler_t endsubmsg, + upb_unknownval_handler_t unknown) { + upb_register_startend(h, start, end); + //upb_register_unknownval(h, unknown); + upb_msgdef *m = h->top->msgdef; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_value fval; + upb_value_setfielddef(&fval, f); + if (upb_issubmsg(f)) { + upb_handlers_push(h, f, startsubmsg, endsubmsg, fval, false); + upb_register_all(h, start, end, value, startsubmsg, endsubmsg, unknown); + upb_handlers_pop(h, f); + } else { + upb_register_value(h, f, value, fval); + } + } +} + +void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, upb_value_handler_t value, + upb_value fval) { + upb_handlers_getorcreate(h, fieldnum, type, fval)->cb.value = + value ? value : &upb_value_nop; +} + +void upb_register_value(upb_handlers *h, upb_fielddef *f, + upb_value_handler_t value, upb_value fval) { + assert(f->msgdef == h->top->msgdef); + upb_register_typed_value(h, f->number, f->type, value, fval); +} + +void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, + upb_startsubmsg_handler_t start, + upb_endsubmsg_handler_t end, + upb_value fval) { + upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, fval); + f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop; + f->endsubmsg = end ? end : &upb_endsubmsg_nop; +} + +void upb_handlers_typed_link(upb_handlers *h, + upb_field_number_t fieldnum, + upb_fieldtype_t type, + int frames) { + assert(frames <= (h->top - h->stack)); + upb_handlers_fieldent *f = + upb_handlers_getorcreate_without_fval(h, fieldnum, type); + f->msgent_index = (h->top - frames)->msgent_index; +} + +void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type) { + upb_handlers_fieldent *f = + upb_handlers_getorcreate_without_fval(h, fieldnum, type); + if (h->top == h->limit) abort(); // TODO: make growable. + ++h->top; + if (f->msgent_index == -1) { + // Need to push a new msgent. + if (h->msgs_size == h->msgs_len) { + h->msgs_size *= 2; + h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); + } + f->msgent_index = h->msgs_len++; + h->msgent = &h->msgs[f->msgent_index]; + upb_msgent_init(h->msgent); + } else { + h->msgent = &h->msgs[f->msgent_index]; + } + h->top->msgent_index = f->msgent_index; + if (h->toplevel_msgdef) { + upb_fielddef *f = upb_msgdef_itof((h->top - 1)->msgdef, fieldnum); + assert(f); + h->top->msgdef = upb_downcast_msgdef(f->def); + } +} + +void upb_handlers_push(upb_handlers *h, upb_fielddef *f, + upb_startsubmsg_handler_t start, + upb_endsubmsg_handler_t end, upb_value fval, + bool delegate) { + assert(f->msgdef == h->top->msgdef); + (void)delegate; // TODO + upb_register_typed_submsg(h, f->number, f->type, start, end, fval); + upb_handlers_typed_push(h, f->number, f->type); +} + +void upb_handlers_typed_pop(upb_handlers *h) { + assert(h->top > h->stack); + --h->top; + h->msgent = &h->msgs[h->top->msgent_index]; +} + +void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) { + (void)f; // TODO: Check that this matches the corresponding push. + upb_handlers_typed_pop(h); +} + +/* upb_dispatcher *************************************************************/ + +static upb_handlers_fieldent toplevel_f = { + false, 0, 0, 0, // The one value that is actually read +#ifdef NDEBUG + {{0}}, +#else + {{0}, UPB_VALUETYPE_RAW}, +#endif + {NULL}, NULL}; + +void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, + size_t top_end_offset) { + d->handlers = h; + for (int i = 0; i < h->msgs_len; i++) + upb_inttable_compact(&h->msgs[i].fieldtab); + d->stack[0].end_offset = top_end_offset; + d->stack[0].f = &toplevel_f; + upb_status_init(&d->status); +} + +void upb_dispatcher_reset(upb_dispatcher *d) { + d->msgent = &d->handlers->msgs[0]; + d->dispatch_table = &d->msgent->fieldtab; + d->current_depth = 0; + d->skip_depth = INT_MAX; + d->noframe_depth = INT_MAX; + d->delegated_depth = 0; + d->top = d->stack; + d->limit = &d->stack[UPB_MAX_NESTING]; +} + +void upb_dispatcher_uninit(upb_dispatcher *d) { + upb_status_uninit(&d->status); +} + +void upb_dispatcher_break(upb_dispatcher *d) { + assert(d->skip_depth == INT_MAX); + assert(d->noframe_depth == INT_MAX); + d->noframe_depth = d->current_depth; +} + +upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d, void *closure) { + d->top->closure = closure; + upb_flow_t flow = d->msgent->startmsg(closure); + if (flow != UPB_CONTINUE) { + d->noframe_depth = d->current_depth + 1; + d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth; + return UPB_SKIPSUBMSG; + } + return UPB_CONTINUE; +} + +void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) { + assert(d->top == d->stack); + d->msgent->endmsg(d->top->closure, &d->status); + // TODO: should we avoid this copy by passing client's status obj to cbs? + upb_copyerr(status, &d->status); +} + +upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + upb_dispatcher_field *f, + size_t userval) { + ++d->current_depth; + if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; + upb_sflow_t sflow = f->cb.startsubmsg(d->top->closure, f->fval); + if (sflow.flow != UPB_CONTINUE) { + d->noframe_depth = d->current_depth; + d->skip_depth = (sflow.flow == UPB_BREAK) ? + d->delegated_depth : d->current_depth; + return UPB_SKIPSUBMSG; + } + + ++d->top; + if(d->top >= d->limit) { + upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); + d->noframe_depth = d->current_depth; + d->skip_depth = d->delegated_depth; + return UPB_SKIPSUBMSG; + } + d->top->f = f; + d->top->end_offset = userval; + d->top->closure = sflow.closure; + d->msgent = upb_handlers_getmsgent(d->handlers, f); + d->dispatch_table = &d->msgent->fieldtab; + return upb_dispatch_startmsg(d, d->top->closure); +} + +upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t flow; + if (upb_dispatcher_noframe(d)) { + flow = UPB_SKIPSUBMSG; + } else { + assert(d->top > d->stack); + upb_dispatcher_field *old_f = d->top->f; + d->msgent->endmsg(d->top->closure, &d->status); + --d->top; + d->msgent = upb_handlers_getmsgent(d->handlers, d->top->f); + d->dispatch_table = &d->msgent->fieldtab; + d->noframe_depth = INT_MAX; + if (!upb_dispatcher_skipping(d)) d->skip_depth = INT_MAX; + // Deliver like a regular value. + flow = old_f->endsubmsg(d->top->closure, old_f->fval); + } + --d->current_depth; + return flow; +} -- cgit v1.2.3