summaryrefslogtreecommitdiff
path: root/src/upb_stream.c
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2011-03-20 13:13:51 -0700
committerJosh Haberman <jhaberman@gmail.com>2011-03-20 13:13:51 -0700
commit8ef6873e0e14309a1715a252a650bab0ae1a33ef (patch)
treea9f81f9fa3ee24b923310cef964c1cbe1bf47a19 /src/upb_stream.c
parent37e1c3102be15f1e57805e828993156e3492d764 (diff)
upb_stream: all callbacks registered ahead-of-time.
This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
Diffstat (limited to 'src/upb_stream.c')
-rw-r--r--src/upb_stream.c328
1 files changed, 328 insertions, 0 deletions
diff --git a/src/upb_stream.c b/src/upb_stream.c
new file mode 100644
index 0000000..3634d5d
--- /dev/null
+++ b/src/upb_stream.c
@@ -0,0 +1,328 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Joshua Haberman. See LICENSE for details.
+ */
+
+#include <stdlib.h>
+#include "upb_stream.h"
+
+
+/* upb_handlers ***************************************************************/
+
+static upb_flow_t upb_startmsg_nop(void *closure) {
+ (void)closure;
+ return UPB_CONTINUE;
+}
+
+static void upb_endmsg_nop(void *closure, upb_status *status) {
+ (void)closure;
+ (void)status;
+}
+
+static upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
+ (void)closure;
+ (void)fval;
+ (void)val;
+ return UPB_CONTINUE;
+}
+
+static upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
+ (void)fval;
+ return UPB_CONTINUE_WITH(closure);
+}
+
+static upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
+ (void)closure;
+ (void)fval;
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
+ upb_value val) {
+ (void)closure;
+ (void)fieldnum;
+ (void)val;
+ return UPB_CONTINUE;
+}
+
+static void upb_msgent_init(upb_handlers_msgent *e) {
+ upb_inttable_init(&e->fieldtab, 8, sizeof(upb_handlers_fieldent));
+ e->startmsg = &upb_startmsg_nop;
+ e->endmsg = &upb_endmsg_nop;
+ e->unknownval = &upb_unknownval_nop;
+}
+
+void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
+ h->msgs_len = 1;
+ h->msgs_size = 4;
+ h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
+ h->top = &h->stack[0];
+ h->limit = &h->stack[UPB_MAX_TYPE_DEPTH];
+ h->toplevel_msgdef = md;
+ if (md) upb_msgdef_ref(md);
+
+ h->top->msgent_index = 0;
+ h->top->msgdef = md;
+ h->msgent = &h->msgs[0];
+ upb_msgent_init(h->msgent);
+}
+
+void upb_handlers_uninit(upb_handlers *h) {
+ for (int i = 0; i < h->msgs_len; i++) upb_inttable_free(&h->msgs[i].fieldtab);
+ free(h->msgs);
+ upb_msgdef_unref(h->toplevel_msgdef);
+}
+
+static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
+ upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type) {
+ upb_handlers_fieldent *f =
+ upb_inttable_lookup(&h->msgent->fieldtab, fieldnum);
+ if (!f) {
+ upb_wire_type_t native_wire_type = upb_types[type].native_wire_type;
+ upb_handlers_fieldent new_f = {
+ false, type, native_wire_type, -1, UPB_NO_VALUE,
+ {&upb_value_nop}, &upb_endsubmsg_nop};
+ if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop;
+ upb_inttable_insert(&h->msgent->fieldtab, fieldnum, &new_f);
+
+ f = upb_inttable_lookup(&h->msgent->fieldtab, fieldnum);
+ assert(f);
+ }
+ assert(f->type == type);
+ return f;
+}
+
+static upb_handlers_fieldent *upb_handlers_getorcreate(
+ upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type, upb_value fval) {
+ upb_handlers_fieldent *f =
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ f->fval = fval;
+ return f;
+}
+
+void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg,
+ upb_endmsg_handler_t endmsg) {
+ h->msgent->startmsg = startmsg ? startmsg : &upb_startmsg_nop;
+ h->msgent->endmsg = endmsg ? endmsg : &upb_endmsg_nop;
+}
+
+// TODO:
+// void upb_register_unknownval(upb_handlers *h,
+// upb_unknownval_handler_t unknown);
+// bool upb_handlers_link(upb_handlers *h, upb_fielddef *f);
+// void upb_register_path_value(upb_handlers *h, const char *path,
+// upb_value_handler_t value, upb_value fval);
+
+void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
+ upb_endmsg_handler_t end,
+ upb_value_handler_t value,
+ upb_startsubmsg_handler_t startsubmsg,
+ upb_endsubmsg_handler_t endsubmsg,
+ upb_unknownval_handler_t unknown) {
+ upb_register_startend(h, start, end);
+ //upb_register_unknownval(h, unknown);
+ upb_msgdef *m = h->top->msgdef;
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ upb_value fval;
+ upb_value_setfielddef(&fval, f);
+ if (upb_issubmsg(f)) {
+ upb_handlers_push(h, f, startsubmsg, endsubmsg, fval, false);
+ upb_register_all(h, start, end, value, startsubmsg, endsubmsg, unknown);
+ upb_handlers_pop(h, f);
+ } else {
+ upb_register_value(h, f, value, fval);
+ }
+ }
+}
+
+void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type, upb_value_handler_t value,
+ upb_value fval) {
+ upb_handlers_getorcreate(h, fieldnum, type, fval)->cb.value =
+ value ? value : &upb_value_nop;
+}
+
+void upb_register_value(upb_handlers *h, upb_fielddef *f,
+ upb_value_handler_t value, upb_value fval) {
+ assert(f->msgdef == h->top->msgdef);
+ upb_register_typed_value(h, f->number, f->type, value, fval);
+}
+
+void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type,
+ upb_startsubmsg_handler_t start,
+ upb_endsubmsg_handler_t end,
+ upb_value fval) {
+ upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, fval);
+ f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop;
+ f->endsubmsg = end ? end : &upb_endsubmsg_nop;
+}
+
+void upb_handlers_typed_link(upb_handlers *h,
+ upb_field_number_t fieldnum,
+ upb_fieldtype_t type,
+ int frames) {
+ assert(frames <= (h->top - h->stack));
+ upb_handlers_fieldent *f =
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ f->msgent_index = (h->top - frames)->msgent_index;
+}
+
+void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type) {
+ upb_handlers_fieldent *f =
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ if (h->top == h->limit) abort(); // TODO: make growable.
+ ++h->top;
+ if (f->msgent_index == -1) {
+ // Need to push a new msgent.
+ if (h->msgs_size == h->msgs_len) {
+ h->msgs_size *= 2;
+ h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs));
+ }
+ f->msgent_index = h->msgs_len++;
+ h->msgent = &h->msgs[f->msgent_index];
+ upb_msgent_init(h->msgent);
+ } else {
+ h->msgent = &h->msgs[f->msgent_index];
+ }
+ h->top->msgent_index = f->msgent_index;
+ if (h->toplevel_msgdef) {
+ upb_fielddef *f = upb_msgdef_itof((h->top - 1)->msgdef, fieldnum);
+ assert(f);
+ h->top->msgdef = upb_downcast_msgdef(f->def);
+ }
+}
+
+void upb_handlers_push(upb_handlers *h, upb_fielddef *f,
+ upb_startsubmsg_handler_t start,
+ upb_endsubmsg_handler_t end, upb_value fval,
+ bool delegate) {
+ assert(f->msgdef == h->top->msgdef);
+ (void)delegate; // TODO
+ upb_register_typed_submsg(h, f->number, f->type, start, end, fval);
+ upb_handlers_typed_push(h, f->number, f->type);
+}
+
+void upb_handlers_typed_pop(upb_handlers *h) {
+ assert(h->top > h->stack);
+ --h->top;
+ h->msgent = &h->msgs[h->top->msgent_index];
+}
+
+void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) {
+ (void)f; // TODO: Check that this matches the corresponding push.
+ upb_handlers_typed_pop(h);
+}
+
+/* upb_dispatcher *************************************************************/
+
+static upb_handlers_fieldent toplevel_f = {
+ false, 0, 0, 0, // The one value that is actually read
+#ifdef NDEBUG
+ {{0}},
+#else
+ {{0}, UPB_VALUETYPE_RAW},
+#endif
+ {NULL}, NULL};
+
+void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
+ size_t top_end_offset) {
+ d->handlers = h;
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_inttable_compact(&h->msgs[i].fieldtab);
+ d->stack[0].end_offset = top_end_offset;
+ d->stack[0].f = &toplevel_f;
+ upb_status_init(&d->status);
+}
+
+void upb_dispatcher_reset(upb_dispatcher *d) {
+ d->msgent = &d->handlers->msgs[0];
+ d->dispatch_table = &d->msgent->fieldtab;
+ d->current_depth = 0;
+ d->skip_depth = INT_MAX;
+ d->noframe_depth = INT_MAX;
+ d->delegated_depth = 0;
+ d->top = d->stack;
+ d->limit = &d->stack[UPB_MAX_NESTING];
+}
+
+void upb_dispatcher_uninit(upb_dispatcher *d) {
+ upb_status_uninit(&d->status);
+}
+
+void upb_dispatcher_break(upb_dispatcher *d) {
+ assert(d->skip_depth == INT_MAX);
+ assert(d->noframe_depth == INT_MAX);
+ d->noframe_depth = d->current_depth;
+}
+
+upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d, void *closure) {
+ d->top->closure = closure;
+ upb_flow_t flow = d->msgent->startmsg(closure);
+ if (flow != UPB_CONTINUE) {
+ d->noframe_depth = d->current_depth + 1;
+ d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth;
+ return UPB_SKIPSUBMSG;
+ }
+ return UPB_CONTINUE;
+}
+
+void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
+ assert(d->top == d->stack);
+ d->msgent->endmsg(d->top->closure, &d->status);
+ // TODO: should we avoid this copy by passing client's status obj to cbs?
+ upb_copyerr(status, &d->status);
+}
+
+upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
+ upb_dispatcher_field *f,
+ size_t userval) {
+ ++d->current_depth;
+ if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG;
+ upb_sflow_t sflow = f->cb.startsubmsg(d->top->closure, f->fval);
+ if (sflow.flow != UPB_CONTINUE) {
+ d->noframe_depth = d->current_depth;
+ d->skip_depth = (sflow.flow == UPB_BREAK) ?
+ d->delegated_depth : d->current_depth;
+ return UPB_SKIPSUBMSG;
+ }
+
+ ++d->top;
+ if(d->top >= d->limit) {
+ upb_seterr(&d->status, UPB_ERROR, "Nesting too deep.");
+ d->noframe_depth = d->current_depth;
+ d->skip_depth = d->delegated_depth;
+ return UPB_SKIPSUBMSG;
+ }
+ d->top->f = f;
+ d->top->end_offset = userval;
+ d->top->closure = sflow.closure;
+ d->msgent = upb_handlers_getmsgent(d->handlers, f);
+ d->dispatch_table = &d->msgent->fieldtab;
+ return upb_dispatch_startmsg(d, d->top->closure);
+}
+
+upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) {
+ upb_flow_t flow;
+ if (upb_dispatcher_noframe(d)) {
+ flow = UPB_SKIPSUBMSG;
+ } else {
+ assert(d->top > d->stack);
+ upb_dispatcher_field *old_f = d->top->f;
+ d->msgent->endmsg(d->top->closure, &d->status);
+ --d->top;
+ d->msgent = upb_handlers_getmsgent(d->handlers, d->top->f);
+ d->dispatch_table = &d->msgent->fieldtab;
+ d->noframe_depth = INT_MAX;
+ if (!upb_dispatcher_skipping(d)) d->skip_depth = INT_MAX;
+ // Deliver like a regular value.
+ flow = old_f->endsubmsg(d->top->closure, old_f->fval);
+ }
+ --d->current_depth;
+ return flow;
+}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback