From eb622c0531f44b9521606ba8a4ec2462a1018d1a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 8 May 2011 17:13:09 -0700 Subject: Split upb_stream -> upb_bytestream/upb_handlers. --- src/upb_bytestream.h | 123 +++++++++++++ src/upb_decoder.c | 1 + src/upb_decoder.h | 2 +- src/upb_handlers.c | 343 ++++++++++++++++++++++++++++++++++ src/upb_handlers.h | 397 ++++++++++++++++++++++++++++++++++++++++ src/upb_msg.h | 2 +- src/upb_stdio.h | 6 +- src/upb_stream.c | 343 ---------------------------------- src/upb_stream.h | 498 -------------------------------------------------- src/upb_strstream.c | 1 + src/upb_strstream.h | 2 +- src/upb_textprinter.h | 3 +- 12 files changed, 873 insertions(+), 848 deletions(-) create mode 100644 src/upb_bytestream.h create mode 100644 src/upb_handlers.c create mode 100644 src/upb_handlers.h delete mode 100644 src/upb_stream.c delete mode 100644 src/upb_stream.h (limited to 'src') diff --git a/src/upb_bytestream.h b/src/upb_bytestream.h new file mode 100644 index 0000000..e4b51fd --- /dev/null +++ b/src/upb_bytestream.h @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Defines the interfaces upb_bytesrc and upb_bytesink, which are abstractions + * of read()/write() with useful buffering/sharing semantics. + */ + +#ifndef UPB_BYTESTREAM_H +#define UPB_BYTESTREAM_H + +#include +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_bytesrc ****************************************************************/ + +// upb_bytesrc is a pull interface for streams of bytes, basically an +// abstraction of read()/fread(), but it avoids copies where possible. + +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_status *status); + +typedef struct { + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; +} upb_bytesrc_vtbl; + +struct _upb_bytesrc { + upb_bytesrc_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { + s->vtbl = vtbl; +} + +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If 0, indicates error and puts details in "status". +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status) { + return src->vtbl->read(src, buf, count, status); +} + +// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure +// that "str" is created or just recycled. Returns "false" if no data was +// returned, either due to error or EOF (check status for details). +// +// In comparison to upb_bytesrc_read(), this call can possibly alias existing +// string data (which avoids a copy). On the other hand, if the data was *not* +// already in an existing string, this copies it into a upb_string, and if the +// data needs to be put in a specific range of memory (because eg. you need to +// put it into a different kind of string object) then upb_bytesrc_get() could +// save you a copy. +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + return src->vtbl->getstr(src, str, status); +} + + +/* upb_bytesink ***************************************************************/ + +struct _upb_bytesink; +typedef struct _upb_bytesink upb_bytesink; +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str, upb_status *status); +typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( + upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); + +typedef struct { + upb_bytesink_putstr_fptr putstr; + upb_bytesink_vprintf_fptr vprintf; +} upb_bytesink_vtbl; + +struct _upb_bytesink { + upb_bytesink_vtbl *vtbl; +}; + +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { + s->vtbl = vtbl; +} + + +// TODO: Figure out how buffering should be handled. Should the caller buffer +// data and only call these functions when a buffer is full? Seems most +// efficient, but then buffering has to be configured in the caller, which +// could be anything, which makes it hard to have a standard interface for +// controlling buffering. +// +// The downside of having the bytesink buffer is efficiency: the caller is +// making more (virtual) function calls, and the caller can't arrange to have +// a big contiguous buffer. The bytesink can do this, but will have to copy +// to make the data contiguous. + +// Returns the number of bytes written. +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...) { + va_list args; + va_start(args, fmt); + upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); + va_end(args); + return ret; +} + +// Puts the given string, returning true if the operation was successful, otherwise +// check "status" for details. Ownership of the string is *not* passed; if +// the callee wants a reference he must call upb_string_getref() on it. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status) { + return sink->vtbl->putstr(sink, str, status); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_decoder.c b/src/upb_decoder.c index a10c0ba..9383474 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -8,6 +8,7 @@ #include #include #include +#include "upb_bytestream.h" #include "upb_decoder.h" #include "upb_varint.h" diff --git a/src/upb_decoder.h b/src/upb_decoder.h index 954b33c..3d1b06b 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -20,7 +20,7 @@ #include #include #include -#include "upb_stream.h" +#include "upb_handlers.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_handlers.c b/src/upb_handlers.c new file mode 100644 index 0000000..d80d59f --- /dev/null +++ b/src/upb_handlers.c @@ -0,0 +1,343 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include +#include "upb_handlers.h" + + +/* upb_handlers ***************************************************************/ + +upb_flow_t upb_startmsg_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +void upb_endmsg_nop(void *closure, upb_status *status) { + (void)closure; + (void)status; +} + +upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) { + (void)closure; + (void)fval; + (void)val; + return UPB_CONTINUE; +} + +upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) { + (void)fval; + return UPB_CONTINUE_WITH(closure); +} + +upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) { + (void)closure; + (void)fval; + return UPB_CONTINUE; +} + +upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + +static void upb_msgent_init(upb_msgent *e) { + upb_inttable_init(&e->fieldtab, 8, sizeof(upb_fieldent)); + e->startmsg = &upb_startmsg_nop; + e->endmsg = &upb_endmsg_nop; + e->unknownval = &upb_unknownval_nop; + e->tablearray = NULL; + e->is_group = false; +} + +void upb_handlers_init(upb_handlers *h, upb_msgdef *md) { + h->msgs_len = 1; + h->msgs_size = 4; + h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); + h->top = &h->stack[0]; + h->limit = &h->stack[UPB_MAX_TYPE_DEPTH]; + h->toplevel_msgdef = md; + h->should_jit = true; + if (md) upb_msgdef_ref(md); + + h->top->msgent_index = 0; + h->top->msgdef = md; + h->msgent = &h->msgs[0]; + upb_msgent_init(h->msgent); +} + +void upb_handlers_uninit(upb_handlers *h) { + for (int i = 0; i < h->msgs_len; i++) { + upb_inttable_free(&h->msgs[i].fieldtab); + free(h->msgs[i].tablearray); + } + free(h->msgs); + upb_msgdef_unref(h->toplevel_msgdef); +} + +static upb_fieldent *upb_handlers_getorcreate_without_fval( + upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) { + uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type; + upb_fieldent *f = upb_inttable_lookup(&h->msgent->fieldtab, tag); + if (!f) { + upb_fieldent new_f = {false, type, repeated, + repeated && upb_isprimitivetype(type), fieldnum, -1, UPB_NO_VALUE, + {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, NULL}; + if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop; + upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f); + + f = upb_inttable_lookup(&h->msgent->fieldtab, tag); + assert(f); + } + assert(f->type == type); + return f; +} + +static upb_fieldent *upb_handlers_getorcreate( + upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, upb_value fval) { + upb_fieldent *f = + upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); + f->fval = fval; + return f; +} + +void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, + upb_endmsg_handler_t endmsg) { + h->msgent->startmsg = startmsg ? startmsg : &upb_startmsg_nop; + h->msgent->endmsg = endmsg ? endmsg : &upb_endmsg_nop; +} + +// TODO: +// void upb_register_unknownval(upb_handlers *h, +// upb_unknownval_handler_t unknown); +// bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); +// void upb_register_path_value(upb_handlers *h, const char *path, +// upb_value_handler_t value, upb_value fval); + +void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, + upb_endmsg_handler_t end, + upb_value_handler_t value, + upb_startsubmsg_handler_t startsubmsg, + upb_endsubmsg_handler_t endsubmsg, + upb_unknownval_handler_t unknown) { + upb_register_startend(h, start, end); + //upb_register_unknownval(h, unknown); + upb_msgdef *m = h->top->msgdef; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_value fval; + upb_value_setfielddef(&fval, f); + if (upb_issubmsg(f)) { + upb_handlers_push(h, f, startsubmsg, endsubmsg, fval, false); + upb_register_all(h, start, end, value, startsubmsg, endsubmsg, unknown); + upb_handlers_pop(h, f); + } else { + upb_register_value(h, f, value, fval); + } + } +} + +void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, + upb_value_handler_t value, upb_value fval) { + upb_handlers_getorcreate(h, fieldnum, type, repeated, fval)->cb.value = + value ? value : &upb_value_nop; +} + +void upb_register_value(upb_handlers *h, upb_fielddef *f, + upb_value_handler_t value, upb_value fval) { + assert(f->msgdef == h->top->msgdef); + upb_register_typed_value(h, f->number, f->type, upb_isarray(f), value, fval); +} + +void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, + upb_startsubmsg_handler_t start, + upb_endsubmsg_handler_t end, + upb_value fval) { + upb_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval); + f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop; + f->endsubmsg = end ? end : &upb_endsubmsg_nop; +} + +void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, int frames) { + assert(frames <= (h->top - h->stack)); + upb_fieldent *f = + upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); + f->msgent_index = (h->top - frames)->msgent_index; +} + +void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated) { + upb_fieldent *f = + upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); + if (h->top == h->limit) abort(); // TODO: make growable. + ++h->top; + if (f->msgent_index == -1) { + // Need to push a new msgent. + if (h->msgs_size == h->msgs_len) { + h->msgs_size *= 2; + h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); + } + f->msgent_index = h->msgs_len++; + h->msgent = &h->msgs[f->msgent_index]; + upb_msgent_init(h->msgent); + } else { + h->msgent = &h->msgs[f->msgent_index]; + } + h->top->msgent_index = f->msgent_index; + if (h->toplevel_msgdef) { + upb_fielddef *f = upb_msgdef_itof((h->top - 1)->msgdef, fieldnum); + assert(f); + h->top->msgdef = upb_downcast_msgdef(f->def); + } + if (type == UPB_TYPE(GROUP)) { + // Insert a fieldent for ENDGROUP so we can easily dispatch endgroup when + // we see it in the submessage. + // TODO: assert that no other fields in the group are registered with the + // same name or number. + upb_register_typed_submsg(h, fieldnum, UPB_TYPE_ENDGROUP, false, NULL, NULL, + UPB_NO_VALUE); + h->msgent->is_group = true; + } +} + +void upb_handlers_push(upb_handlers *h, upb_fielddef *f, + upb_startsubmsg_handler_t start, + upb_endsubmsg_handler_t end, upb_value fval, + bool delegate) { + assert(f->msgdef == h->top->msgdef); + (void)delegate; // TODO + upb_register_typed_submsg(h, f->number, f->type, upb_isarray(f), start, end, fval); + upb_handlers_typed_push(h, f->number, f->type, upb_isarray(f)); +} + +void upb_handlers_typed_pop(upb_handlers *h) { + assert(h->top > h->stack); + --h->top; + h->msgent = &h->msgs[h->top->msgent_index]; +} + +void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) { + (void)f; // TODO: Check that this matches the corresponding push. + upb_handlers_typed_pop(h); +} + +/* upb_dispatcher *************************************************************/ + +static upb_fieldent toplevel_f = { + false, UPB_TYPE(GROUP), false, false, 0, + 0, // msgent_index +#ifdef NDEBUG + {{0}}, +#else + {{0}, UPB_VALUETYPE_RAW}, +#endif + {NULL}, NULL, 0, 0, 0, NULL}; + +void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) { + d->handlers = h; + for (int i = 0; i < h->msgs_len; i++) + upb_inttable_compact(&h->msgs[i].fieldtab); + d->stack[0].f = &toplevel_f; + d->limit = &d->stack[UPB_MAX_NESTING]; + upb_status_init(&d->status); +} + +void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset) { + d->msgent = &d->handlers->msgs[0]; + d->dispatch_table = &d->msgent->fieldtab; + d->current_depth = 0; + d->skip_depth = INT_MAX; + d->noframe_depth = INT_MAX; + d->delegated_depth = 0; + d->top = d->stack; + d->top->closure = top_closure; + d->top->end_offset = top_end_offset; + d->top->is_packed = false; +} + +void upb_dispatcher_uninit(upb_dispatcher *d) { + upb_handlers_uninit(d->handlers); + upb_status_uninit(&d->status); +} + +void upb_dispatcher_break(upb_dispatcher *d) { + assert(d->skip_depth == INT_MAX); + assert(d->noframe_depth == INT_MAX); + d->noframe_depth = d->current_depth; +} + +upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { + upb_flow_t flow = d->msgent->startmsg(d->top->closure); + if (flow != UPB_CONTINUE) { + d->noframe_depth = d->current_depth + 1; + d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth; + return UPB_SKIPSUBMSG; + } + return UPB_CONTINUE; +} + +void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) { + assert(d->top == d->stack); + d->msgent->endmsg(d->top->closure, &d->status); + // TODO: should we avoid this copy by passing client's status obj to cbs? + upb_copyerr(status, &d->status); +} + +upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fieldent *f, + size_t userval) { + ++d->current_depth; + if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; + upb_sflow_t sflow = f->cb.startsubmsg(d->top->closure, f->fval); + if (sflow.flow != UPB_CONTINUE) { + d->noframe_depth = d->current_depth; + d->skip_depth = (sflow.flow == UPB_BREAK) ? + d->delegated_depth : d->current_depth; + return UPB_SKIPSUBMSG; + } + + ++d->top; + if(d->top >= d->limit) { + upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); + d->noframe_depth = d->current_depth; + d->skip_depth = d->delegated_depth; + return UPB_SKIPSUBMSG; + } + d->top->f = f; + d->top->end_offset = userval; + d->top->closure = sflow.closure; + d->top->is_packed = false; + d->msgent = upb_handlers_getmsgent(d->handlers, f); + d->dispatch_table = &d->msgent->fieldtab; + return upb_dispatch_startmsg(d); +} + +upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t flow; + if (upb_dispatcher_noframe(d)) { + flow = UPB_SKIPSUBMSG; + } else { + assert(d->top > d->stack); + upb_fieldent *old_f = d->top->f; + d->msgent->endmsg(d->top->closure, &d->status); + --d->top; + d->msgent = upb_handlers_getmsgent(d->handlers, d->top->f); + d->dispatch_table = &d->msgent->fieldtab; + d->noframe_depth = INT_MAX; + if (!upb_dispatcher_skipping(d)) d->skip_depth = INT_MAX; + // Deliver like a regular value. + flow = old_f->endsubmsg(d->top->closure, old_f->fval); + } + --d->current_depth; + return flow; +} diff --git a/src/upb_handlers.h b/src/upb_handlers.h new file mode 100644 index 0000000..d0ef1a4 --- /dev/null +++ b/src/upb_handlers.h @@ -0,0 +1,397 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * This file defines general-purpose streaming data interfaces: + * + * - upb_handlers: represents a set of callbacks, very much like in XML's SAX + * API, that a client can register to do a streaming tree traversal over a + * stream of structured protobuf data, without knowing where that data is + * coming from. + * + * - upb_bytesrc: a pull interface for streams of bytes, basically an + * abstraction of read()/fread(), but it avoids copies where possible. + * + * - upb_bytesink: push interface for streams of bytes, basically an + * abstraction of write()/fwrite(), but it avoids copies where possible. + * + * All of the encoders and decoders are based on these generic interfaces, + * which lets you write streaming algorithms that do not depend on a specific + * serialization format; for example, you can write a pretty printer that works + * with input that came from protobuf binary format, protobuf text format, or + * even an in-memory upb_msg -- the pretty printer will not know the + * difference. + */ + +#ifndef UPB_STREAM_H +#define UPB_STREAM_H + +#include +#include "upb.h" +#include "upb_def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_handlers ***************************************************************/ + +// A upb_handlers object is a table of callbacks that are bound to specific +// messages and fields. A consumer of data registers callbacks and then +// passes the upb_handlers object to the producer, which calls them at the +// appropriate times. + +// All handlers except the endmsg handler return a value from this enum, to +// control whether parsing will continue or not. +typedef enum { + // Data source should continue calling callbacks. + UPB_CONTINUE = 0, + + // Halt processing permanently (in a non-resumable way). The endmsg handlers + // for any currently open messages will be called which can supply a more + // specific status message. If UPB_BREAK is returned from inside a delegated + // message, processing will continue normally in the containing message (though + // the containing message can inspect the returned status and choose to also + // return UPB_BREAK if it is not ok). + UPB_BREAK, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). In other words, it is + // like a UPB_BREAK that applies only to the current level. + // + // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will + // be called to perform cleanup and return a status. Returning + // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg, + // endmsg, or endsubmsg handlers. + UPB_SKIPSUBMSG, + + // TODO: Add UPB_SUSPEND, for resumable producers/consumers. +} upb_flow_t; + +typedef struct _upb_sflow upb_sflow_t; +typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); +typedef void (*upb_endmsg_handler_t)(void *closure, upb_status *status); +typedef upb_flow_t (*upb_value_handler_t)( + void *closure, upb_value fval, upb_value val); +typedef upb_sflow_t (*upb_startsubmsg_handler_t)( + void *closure, upb_value fval); +typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval); +typedef upb_flow_t (*upb_unknownval_handler_t)( + void *closure, upb_field_number_t fieldnum, upb_value val); + +upb_flow_t upb_startmsg_nop(void *closure); +void upb_endmsg_nop(void *closure, upb_status *status); +upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val); +upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval); +upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval); +upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val); +struct _upb_decoder; +typedef struct _upb_fieldent { + bool junk; + upb_fieldtype_t type; + bool repeated; + bool is_repeated_primitive; + uint32_t number; + // For upb_issubmsg(f) only, the index into the msgdef array of the submsg. + // -1 if unset (indicates that submsg should be skipped). + int32_t msgent_index; + upb_value fval; + union { + upb_value_handler_t value; + upb_startsubmsg_handler_t startsubmsg; + } cb; + upb_endsubmsg_handler_t endsubmsg; + uint32_t jit_pclabel; + uint32_t jit_pclabel_notypecheck; + uint32_t jit_submsg_done_pclabel; + void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); +} upb_fieldent; + +typedef struct _upb_msgent { + upb_startmsg_handler_t startmsg; + upb_endmsg_handler_t endmsg; + upb_unknownval_handler_t unknownval; + // Maps field number -> upb_fieldent. + upb_inttable fieldtab; + uint32_t jit_startmsg_pclabel; + uint32_t jit_endofbuf_pclabel; + uint32_t jit_endofmsg_pclabel; + uint32_t jit_unknownfield_pclabel; + bool is_group; + int32_t jit_parent_field_done_pclabel; + uint32_t max_field_number; + // Currently keyed on field number. Could also try keying it + // on encoded or decoded tag, or on encoded field number. + void **tablearray; +} upb_msgent; + +typedef struct { + upb_msgdef *msgdef; + int msgent_index; +} upb_handlers_frame; + +struct _upb_handlers { + // Array of msgdefs, [0]=toplevel. + upb_msgent *msgs; + int msgs_len, msgs_size; + upb_msgdef *toplevel_msgdef; // We own a ref. + upb_msgent *msgent; + upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit; + bool should_jit; +}; +typedef struct _upb_handlers upb_handlers; + +// The handlers object takes a ref on md. md can be NULL iff the client calls +// only upb_*_typed_*() (only upb_symtab should do this). +void upb_handlers_init(upb_handlers *h, upb_msgdef *md); +void upb_handlers_uninit(upb_handlers *h); + +// The startsubmsg handler needs to also pass a closure to the submsg. +struct _upb_sflow { + upb_flow_t flow; + void *closure; +}; +INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { + upb_sflow_t ret = {flow, closure}; + return ret; +} +#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) +#define UPB_S_BREAK UPB_SFLOW(UPB_BREAK, NULL) + +// Each message can have its own set of handlers. Here are empty definitions +// of the handlers for convenient copy/paste. +// TODO: Should endsubmsg get a copy of the upb_status*, so it can decide what +// to do in the case of a delegated failure? +// +// static upb_flow_t startmsg(void *closure) { +// // Called when the message begins. "closure" was supplied by our caller. +// // "mval" is whatever was bound to this message at registration time (for +// // upb_register_all() it will be its upb_msgdef*). +// return UPB_CONTINUE; +// } +// +// static void endmsg(void *closure, upb_status *status) { +// // Called when processing of this top-level message ends, whether in +// // success or failure. "status" indicates the final status of processing, +// // and can also be modified in-place to update the final status. +// // +// // Since this callback is guaranteed to always be called eventually, it +// // can be used to free any resources that were allocated during processing. +// } +// +// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { +// // Called for every non-submessage value in the stream. "fval" contains +// // whatever value was bound to this field at registration type +// // (for upb_register_all(), this will be the field's upb_fielddef*). +// return UPB_CONTINUE; +// } +// +// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { +// // Called when a submessage begins. The second element of the return +// // value is the closure for the submessage. +// return UPB_CONTINUE_WITH(closure); +// } +// +// static upb_flow_t endsubmsg(void *closure, upb_value fval) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, +// upb_value val) { +// // Called with an unknown value is encountered. +// return UPB_CONTINUE; +// } + +// Functions to register the above handlers. +// TODO: as an optimization, we could special-case handlers that don't +// need fval, to avoid even generating the code that sets the argument. +// If a value does not have a handler registered and there is no unknownval +// handler, the value will be skipped. +void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, + upb_endmsg_handler_t endmsg); +void upb_register_value(upb_handlers *h, upb_fielddef *f, + upb_value_handler_t value, upb_value fval); +void upb_register_unknownval(upb_handlers *h, upb_unknownval_handler_t unknown); + +// To register handlers for a submessage, push the fielddef and pop it +// when you're done. This can be used to delegate a submessage to a +// different processing component which does not need to be aware whether +// it is at the top level or not. +void upb_handlers_push(upb_handlers *h, upb_fielddef *f, + upb_startsubmsg_handler_t start, + upb_endsubmsg_handler_t end, upb_value fval, + bool delegate); +void upb_handlers_pop(upb_handlers *h, upb_fielddef *f); + +// In the case where types are self-recursive or mutually recursive, you can +// use this function which will link a set of handlers to a set that is +// already on our stack. This allows us to handle a tree of arbitrary +// depth without having to register an arbitrary number of levels of handlers. +// Returns "true" if the given type is indeed on the stack already and was +// linked. +// +// If more than one message of this type is on the stack, it chooses the +// one that is deepest in the tree (if necessary, we could give the caller +// more control over this). +bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); + +// Convenience function for registering the given handler for the given +// field path. This will overwrite any startsubmsg handlers that were +// previously registered along the path. These can be overwritten again +// later if desired. +// TODO: upb_register_path_submsg()? +void upb_register_path_value(upb_handlers *h, const char *path, + upb_value_handler_t value, upb_value fval); + +// Convenience function for registering a single set of handlers on every +// message in our hierarchy. mvals are bound to upb_msgdef* and fvals are +// bound to upb_fielddef*. Any of the handlers can be NULL. +void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, + upb_endmsg_handler_t end, + upb_value_handler_t value, + upb_startsubmsg_handler_t startsubmsg, + upb_endsubmsg_handler_t endsubmsg, + upb_unknownval_handler_t unknown); + +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(upb_handlers *h, bool use_bytesrcs); + +// Low-level functions -- internal-only. +void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, + upb_value_handler_t value, upb_value fval); +void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, + upb_startsubmsg_handler_t start, + upb_endsubmsg_handler_t end, + upb_value fval); +void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, int frames); +void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated); +void upb_handlers_typed_pop(upb_handlers *h); + +INLINE upb_msgent *upb_handlers_getmsgent(upb_handlers *h, upb_fieldent *f) { + assert(f->msgent_index != -1); + return &h->msgs[f->msgent_index]; +} +upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum); + + +/* upb_dispatcher *************************************************************/ + +// upb_dispatcher can be used by sources of data to invoke the appropriate +// handlers. It takes care of details such as: +// - ensuring all endmsg callbacks (cleanup handlers) are called. +// - propagating status all the way back to the top-level message. +// - handling UPB_BREAK properly (clients only need to handle UPB_SKIPSUBMSG). +// - handling UPB_SKIPSUBMSG if the client doesn't (but this is less +// efficient, because then you can't skip the actual work). +// - tracking the stack of closures. +// +// TODO: it might be best to actually surface UPB_BREAK to clients in the case +// that the can't efficiently skip the submsg; eg. with groups. Then the client +// would know to just unwind the stack without bothering to consume the rest of +// the input. On the other hand, it might be important for all the input to be +// consumed, like if this is a submessage of a larger stream. + +typedef struct { + upb_fieldent *f; + void *closure; + // Relative to the beginning of this buffer. + // For groups and the top-level: UINT32_MAX. + uint32_t end_offset; + bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND +} upb_dispatcher_frame; + +typedef struct { + upb_dispatcher_frame *top, *limit; + + upb_handlers *handlers; + + // Msg and dispatch table for the current level. + upb_msgent *msgent; + upb_inttable *dispatch_table; + + // The number of startsubmsg calls without a corresponding endsubmsg call. + int current_depth; + + // For all frames >= skip_depth, we are skipping all values in the submsg. + // For all frames >= noframe_depth, we did not even push a frame. + // These are INT_MAX when nothing is being skipped. + // Invariant: noframe_depth >= skip_depth + int skip_depth; + int noframe_depth; + + // Depth of stack entries we'll skip if a callback returns UPB_BREAK. + int delegated_depth; + + // Stack. + upb_status status; + upb_dispatcher_frame stack[UPB_MAX_NESTING]; +} upb_dispatcher; + +INLINE bool upb_dispatcher_skipping(upb_dispatcher *d) { + return d->current_depth >= d->skip_depth; +} + +// If true, upb_dispatcher_skipping(d) must also be true. +INLINE bool upb_dispatcher_noframe(upb_dispatcher *d) { + return d->current_depth >= d->noframe_depth; +} + + +void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h); +void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset); +void upb_dispatcher_uninit(upb_dispatcher *d); + +upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); + +// Looks up a field by number for the current message. +INLINE upb_fieldent *upb_dispatcher_lookup(upb_dispatcher *d, + upb_field_number_t n) { + return (upb_fieldent*)upb_inttable_fastlookup( + d->dispatch_table, n, sizeof(upb_fieldent)); +} + +// Dispatches values or submessages -- the client is responsible for having +// previously looked up the field. +upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + upb_fieldent *f, + size_t userval); +upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); + +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fieldent *f, + upb_value val) { + if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; + upb_flow_t flow = f->cb.value(d->top->closure, f->fval, val); + if (flow != UPB_CONTINUE) { + d->noframe_depth = d->current_depth + 1; + d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth; + return UPB_SKIPSUBMSG; + } + return UPB_CONTINUE; +} +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, upb_field_number_t n, + upb_value val) { + // TODO. + (void)d; + (void)n; + (void)val; + return UPB_CONTINUE; +} +INLINE bool upb_dispatcher_stackempty(upb_dispatcher *d) { + return d->top == d->stack; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_msg.h b/src/upb_msg.h index 180f918..68ba13a 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -18,8 +18,8 @@ #ifndef UPB_MSG_H #define UPB_MSG_H -#include "upb_stream.h" #include +#include "upb_handlers.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_stdio.h b/src/upb_stdio.h index ba5fe1a..a164821 100644 --- a/src/upb_stdio.h +++ b/src/upb_stdio.h @@ -9,7 +9,7 @@ */ #include -#include "upb_stream.h" +#include "upb_bytestream.h" #ifndef UPB_STDIO_H_ #define UPB_STDIO_H_ @@ -33,8 +33,8 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file); // invalidated by upb_stdio_reset above. It is perfectly valid to get both // a bytesrc and a bytesink for the same stdio if the FILE* is open for reading // and writing. -upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio); -upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio); +upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_stream.c b/src/upb_stream.c deleted file mode 100644 index fe3a552..0000000 --- a/src/upb_stream.c +++ /dev/null @@ -1,343 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2011 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#include -#include "upb_stream.h" - - -/* upb_handlers ***************************************************************/ - -upb_flow_t upb_startmsg_nop(void *closure) { - (void)closure; - return UPB_CONTINUE; -} - -void upb_endmsg_nop(void *closure, upb_status *status) { - (void)closure; - (void)status; -} - -upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) { - (void)closure; - (void)fval; - (void)val; - return UPB_CONTINUE; -} - -upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) { - (void)fval; - return UPB_CONTINUE_WITH(closure); -} - -upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) { - (void)closure; - (void)fval; - return UPB_CONTINUE; -} - -upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, - upb_value val) { - (void)closure; - (void)fieldnum; - (void)val; - return UPB_CONTINUE; -} - -static void upb_msgent_init(upb_msgent *e) { - upb_inttable_init(&e->fieldtab, 8, sizeof(upb_fieldent)); - e->startmsg = &upb_startmsg_nop; - e->endmsg = &upb_endmsg_nop; - e->unknownval = &upb_unknownval_nop; - e->tablearray = NULL; - e->is_group = false; -} - -void upb_handlers_init(upb_handlers *h, upb_msgdef *md) { - h->msgs_len = 1; - h->msgs_size = 4; - h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); - h->top = &h->stack[0]; - h->limit = &h->stack[UPB_MAX_TYPE_DEPTH]; - h->toplevel_msgdef = md; - h->should_jit = true; - if (md) upb_msgdef_ref(md); - - h->top->msgent_index = 0; - h->top->msgdef = md; - h->msgent = &h->msgs[0]; - upb_msgent_init(h->msgent); -} - -void upb_handlers_uninit(upb_handlers *h) { - for (int i = 0; i < h->msgs_len; i++) { - upb_inttable_free(&h->msgs[i].fieldtab); - free(h->msgs[i].tablearray); - } - free(h->msgs); - upb_msgdef_unref(h->toplevel_msgdef); -} - -static upb_fieldent *upb_handlers_getorcreate_without_fval( - upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) { - uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type; - upb_fieldent *f = upb_inttable_lookup(&h->msgent->fieldtab, tag); - if (!f) { - upb_fieldent new_f = {false, type, repeated, - repeated && upb_isprimitivetype(type), fieldnum, -1, UPB_NO_VALUE, - {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, NULL}; - if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop; - upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f); - - f = upb_inttable_lookup(&h->msgent->fieldtab, tag); - assert(f); - } - assert(f->type == type); - return f; -} - -static upb_fieldent *upb_handlers_getorcreate( - upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, upb_value fval) { - upb_fieldent *f = - upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); - f->fval = fval; - return f; -} - -void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, - upb_endmsg_handler_t endmsg) { - h->msgent->startmsg = startmsg ? startmsg : &upb_startmsg_nop; - h->msgent->endmsg = endmsg ? endmsg : &upb_endmsg_nop; -} - -// TODO: -// void upb_register_unknownval(upb_handlers *h, -// upb_unknownval_handler_t unknown); -// bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); -// void upb_register_path_value(upb_handlers *h, const char *path, -// upb_value_handler_t value, upb_value fval); - -void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, - upb_endmsg_handler_t end, - upb_value_handler_t value, - upb_startsubmsg_handler_t startsubmsg, - upb_endsubmsg_handler_t endsubmsg, - upb_unknownval_handler_t unknown) { - upb_register_startend(h, start, end); - //upb_register_unknownval(h, unknown); - upb_msgdef *m = h->top->msgdef; - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - upb_value fval; - upb_value_setfielddef(&fval, f); - if (upb_issubmsg(f)) { - upb_handlers_push(h, f, startsubmsg, endsubmsg, fval, false); - upb_register_all(h, start, end, value, startsubmsg, endsubmsg, unknown); - upb_handlers_pop(h, f); - } else { - upb_register_value(h, f, value, fval); - } - } -} - -void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, - upb_value_handler_t value, upb_value fval) { - upb_handlers_getorcreate(h, fieldnum, type, repeated, fval)->cb.value = - value ? value : &upb_value_nop; -} - -void upb_register_value(upb_handlers *h, upb_fielddef *f, - upb_value_handler_t value, upb_value fval) { - assert(f->msgdef == h->top->msgdef); - upb_register_typed_value(h, f->number, f->type, upb_isarray(f), value, fval); -} - -void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, - upb_startsubmsg_handler_t start, - upb_endsubmsg_handler_t end, - upb_value fval) { - upb_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval); - f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop; - f->endsubmsg = end ? end : &upb_endsubmsg_nop; -} - -void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, int frames) { - assert(frames <= (h->top - h->stack)); - upb_fieldent *f = - upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); - f->msgent_index = (h->top - frames)->msgent_index; -} - -void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated) { - upb_fieldent *f = - upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); - if (h->top == h->limit) abort(); // TODO: make growable. - ++h->top; - if (f->msgent_index == -1) { - // Need to push a new msgent. - if (h->msgs_size == h->msgs_len) { - h->msgs_size *= 2; - h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); - } - f->msgent_index = h->msgs_len++; - h->msgent = &h->msgs[f->msgent_index]; - upb_msgent_init(h->msgent); - } else { - h->msgent = &h->msgs[f->msgent_index]; - } - h->top->msgent_index = f->msgent_index; - if (h->toplevel_msgdef) { - upb_fielddef *f = upb_msgdef_itof((h->top - 1)->msgdef, fieldnum); - assert(f); - h->top->msgdef = upb_downcast_msgdef(f->def); - } - if (type == UPB_TYPE(GROUP)) { - // Insert a fieldent for ENDGROUP so we can easily dispatch endgroup when - // we see it in the submessage. - // TODO: assert that no other fields in the group are registered with the - // same name or number. - upb_register_typed_submsg(h, fieldnum, UPB_TYPE_ENDGROUP, false, NULL, NULL, - UPB_NO_VALUE); - h->msgent->is_group = true; - } -} - -void upb_handlers_push(upb_handlers *h, upb_fielddef *f, - upb_startsubmsg_handler_t start, - upb_endsubmsg_handler_t end, upb_value fval, - bool delegate) { - assert(f->msgdef == h->top->msgdef); - (void)delegate; // TODO - upb_register_typed_submsg(h, f->number, f->type, upb_isarray(f), start, end, fval); - upb_handlers_typed_push(h, f->number, f->type, upb_isarray(f)); -} - -void upb_handlers_typed_pop(upb_handlers *h) { - assert(h->top > h->stack); - --h->top; - h->msgent = &h->msgs[h->top->msgent_index]; -} - -void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) { - (void)f; // TODO: Check that this matches the corresponding push. - upb_handlers_typed_pop(h); -} - -/* upb_dispatcher *************************************************************/ - -static upb_fieldent toplevel_f = { - false, UPB_TYPE(GROUP), false, false, 0, - 0, // msgent_index -#ifdef NDEBUG - {{0}}, -#else - {{0}, UPB_VALUETYPE_RAW}, -#endif - {NULL}, NULL, 0, 0, 0, NULL}; - -void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) { - d->handlers = h; - for (int i = 0; i < h->msgs_len; i++) - upb_inttable_compact(&h->msgs[i].fieldtab); - d->stack[0].f = &toplevel_f; - d->limit = &d->stack[UPB_MAX_NESTING]; - upb_status_init(&d->status); -} - -void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset) { - d->msgent = &d->handlers->msgs[0]; - d->dispatch_table = &d->msgent->fieldtab; - d->current_depth = 0; - d->skip_depth = INT_MAX; - d->noframe_depth = INT_MAX; - d->delegated_depth = 0; - d->top = d->stack; - d->top->closure = top_closure; - d->top->end_offset = top_end_offset; - d->top->is_packed = false; -} - -void upb_dispatcher_uninit(upb_dispatcher *d) { - upb_handlers_uninit(d->handlers); - upb_status_uninit(&d->status); -} - -void upb_dispatcher_break(upb_dispatcher *d) { - assert(d->skip_depth == INT_MAX); - assert(d->noframe_depth == INT_MAX); - d->noframe_depth = d->current_depth; -} - -upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { - upb_flow_t flow = d->msgent->startmsg(d->top->closure); - if (flow != UPB_CONTINUE) { - d->noframe_depth = d->current_depth + 1; - d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth; - return UPB_SKIPSUBMSG; - } - return UPB_CONTINUE; -} - -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) { - assert(d->top == d->stack); - d->msgent->endmsg(d->top->closure, &d->status); - // TODO: should we avoid this copy by passing client's status obj to cbs? - upb_copyerr(status, &d->status); -} - -upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fieldent *f, - size_t userval) { - ++d->current_depth; - if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; - upb_sflow_t sflow = f->cb.startsubmsg(d->top->closure, f->fval); - if (sflow.flow != UPB_CONTINUE) { - d->noframe_depth = d->current_depth; - d->skip_depth = (sflow.flow == UPB_BREAK) ? - d->delegated_depth : d->current_depth; - return UPB_SKIPSUBMSG; - } - - ++d->top; - if(d->top >= d->limit) { - upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); - d->noframe_depth = d->current_depth; - d->skip_depth = d->delegated_depth; - return UPB_SKIPSUBMSG; - } - d->top->f = f; - d->top->end_offset = userval; - d->top->closure = sflow.closure; - d->top->is_packed = false; - d->msgent = upb_handlers_getmsgent(d->handlers, f); - d->dispatch_table = &d->msgent->fieldtab; - return upb_dispatch_startmsg(d); -} - -upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { - upb_flow_t flow; - if (upb_dispatcher_noframe(d)) { - flow = UPB_SKIPSUBMSG; - } else { - assert(d->top > d->stack); - upb_fieldent *old_f = d->top->f; - d->msgent->endmsg(d->top->closure, &d->status); - --d->top; - d->msgent = upb_handlers_getmsgent(d->handlers, d->top->f); - d->dispatch_table = &d->msgent->fieldtab; - d->noframe_depth = INT_MAX; - if (!upb_dispatcher_skipping(d)) d->skip_depth = INT_MAX; - // Deliver like a regular value. - flow = old_f->endsubmsg(d->top->closure, old_f->fval); - } - --d->current_depth; - return flow; -} diff --git a/src/upb_stream.h b/src/upb_stream.h deleted file mode 100644 index e749964..0000000 --- a/src/upb_stream.h +++ /dev/null @@ -1,498 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * This file defines general-purpose streaming data interfaces: - * - * - upb_handlers: represents a set of callbacks, very much like in XML's SAX - * API, that a client can register to do a streaming tree traversal over a - * stream of structured protobuf data, without knowing where that data is - * coming from. - * - * - upb_bytesrc: a pull interface for streams of bytes, basically an - * abstraction of read()/fread(), but it avoids copies where possible. - * - * - upb_bytesink: push interface for streams of bytes, basically an - * abstraction of write()/fwrite(), but it avoids copies where possible. - * - * All of the encoders and decoders are based on these generic interfaces, - * which lets you write streaming algorithms that do not depend on a specific - * serialization format; for example, you can write a pretty printer that works - * with input that came from protobuf binary format, protobuf text format, or - * even an in-memory upb_msg -- the pretty printer will not know the - * difference. - */ - -#ifndef UPB_STREAM_H -#define UPB_STREAM_H - -#include -#include "upb.h" -#include "upb_def.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_handlers ***************************************************************/ - -// A upb_handlers object is a table of callbacks that are bound to specific -// messages and fields. A consumer of data registers callbacks and then -// passes the upb_handlers object to the producer, which calls them at the -// appropriate times. - -// All handlers except the endmsg handler return a value from this enum, to -// control whether parsing will continue or not. -typedef enum { - // Data source should continue calling callbacks. - UPB_CONTINUE = 0, - - // Halt processing permanently (in a non-resumable way). The endmsg handlers - // for any currently open messages will be called which can supply a more - // specific status message. If UPB_BREAK is returned from inside a delegated - // message, processing will continue normally in the containing message (though - // the containing message can inspect the returned status and choose to also - // return UPB_BREAK if it is not ok). - UPB_BREAK, - - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). In other words, it is - // like a UPB_BREAK that applies only to the current level. - // - // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will - // be called to perform cleanup and return a status. Returning - // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg, - // endmsg, or endsubmsg handlers. - UPB_SKIPSUBMSG, - - // TODO: Add UPB_SUSPEND, for resumable producers/consumers. -} upb_flow_t; - -typedef struct _upb_sflow upb_sflow_t; -typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); -typedef void (*upb_endmsg_handler_t)(void *closure, upb_status *status); -typedef upb_flow_t (*upb_value_handler_t)( - void *closure, upb_value fval, upb_value val); -typedef upb_sflow_t (*upb_startsubmsg_handler_t)( - void *closure, upb_value fval); -typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval); -typedef upb_flow_t (*upb_unknownval_handler_t)( - void *closure, upb_field_number_t fieldnum, upb_value val); - -upb_flow_t upb_startmsg_nop(void *closure); -void upb_endmsg_nop(void *closure, upb_status *status); -upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val); -upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval); -upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval); -upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, - upb_value val); -struct _upb_decoder; -typedef struct _upb_fieldent { - bool junk; - upb_fieldtype_t type; - bool repeated; - bool is_repeated_primitive; - uint32_t number; - // For upb_issubmsg(f) only, the index into the msgdef array of the submsg. - // -1 if unset (indicates that submsg should be skipped). - int32_t msgent_index; - upb_value fval; - union { - upb_value_handler_t value; - upb_startsubmsg_handler_t startsubmsg; - } cb; - upb_endsubmsg_handler_t endsubmsg; - uint32_t jit_pclabel; - uint32_t jit_pclabel_notypecheck; - uint32_t jit_submsg_done_pclabel; - void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); -} upb_fieldent; - -typedef struct _upb_msgent { - upb_startmsg_handler_t startmsg; - upb_endmsg_handler_t endmsg; - upb_unknownval_handler_t unknownval; - // Maps field number -> upb_fieldent. - upb_inttable fieldtab; - uint32_t jit_startmsg_pclabel; - uint32_t jit_endofbuf_pclabel; - uint32_t jit_endofmsg_pclabel; - uint32_t jit_unknownfield_pclabel; - bool is_group; - int32_t jit_parent_field_done_pclabel; - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; -} upb_msgent; - -typedef struct { - upb_msgdef *msgdef; - int msgent_index; -} upb_handlers_frame; - -struct _upb_handlers { - // Array of msgdefs, [0]=toplevel. - upb_msgent *msgs; - int msgs_len, msgs_size; - upb_msgdef *toplevel_msgdef; // We own a ref. - upb_msgent *msgent; - upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit; - bool should_jit; -}; -typedef struct _upb_handlers upb_handlers; - -// The handlers object takes a ref on md. md can be NULL iff the client calls -// only upb_*_typed_*() (only upb_symtab should do this). -void upb_handlers_init(upb_handlers *h, upb_msgdef *md); -void upb_handlers_uninit(upb_handlers *h); - -// The startsubmsg handler needs to also pass a closure to the submsg. -struct _upb_sflow { - upb_flow_t flow; - void *closure; -}; -INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { - upb_sflow_t ret = {flow, closure}; - return ret; -} -#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) -#define UPB_S_BREAK UPB_SFLOW(UPB_BREAK, NULL) - -// Each message can have its own set of handlers. Here are empty definitions -// of the handlers for convenient copy/paste. -// TODO: Should endsubmsg get a copy of the upb_status*, so it can decide what -// to do in the case of a delegated failure? -// -// static upb_flow_t startmsg(void *closure) { -// // Called when the message begins. "closure" was supplied by our caller. -// // "mval" is whatever was bound to this message at registration time (for -// // upb_register_all() it will be its upb_msgdef*). -// return UPB_CONTINUE; -// } -// -// static void endmsg(void *closure, upb_status *status) { -// // Called when processing of this top-level message ends, whether in -// // success or failure. "status" indicates the final status of processing, -// // and can also be modified in-place to update the final status. -// // -// // Since this callback is guaranteed to always be called eventually, it -// // can be used to free any resources that were allocated during processing. -// } -// -// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { -// // Called for every non-submessage value in the stream. "fval" contains -// // whatever value was bound to this field at registration type -// // (for upb_register_all(), this will be the field's upb_fielddef*). -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { -// // Called when a submessage begins. The second element of the return -// // value is the closure for the submessage. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endsubmsg(void *closure, upb_value fval) { -// // Called when a submessage ends. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, -// upb_value val) { -// // Called with an unknown value is encountered. -// return UPB_CONTINUE; -// } - -// Functions to register the above handlers. -// TODO: as an optimization, we could special-case handlers that don't -// need fval, to avoid even generating the code that sets the argument. -// If a value does not have a handler registered and there is no unknownval -// handler, the value will be skipped. -void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, - upb_endmsg_handler_t endmsg); -void upb_register_value(upb_handlers *h, upb_fielddef *f, - upb_value_handler_t value, upb_value fval); -void upb_register_unknownval(upb_handlers *h, upb_unknownval_handler_t unknown); - -// To register handlers for a submessage, push the fielddef and pop it -// when you're done. This can be used to delegate a submessage to a -// different processing component which does not need to be aware whether -// it is at the top level or not. -void upb_handlers_push(upb_handlers *h, upb_fielddef *f, - upb_startsubmsg_handler_t start, - upb_endsubmsg_handler_t end, upb_value fval, - bool delegate); -void upb_handlers_pop(upb_handlers *h, upb_fielddef *f); - -// In the case where types are self-recursive or mutually recursive, you can -// use this function which will link a set of handlers to a set that is -// already on our stack. This allows us to handle a tree of arbitrary -// depth without having to register an arbitrary number of levels of handlers. -// Returns "true" if the given type is indeed on the stack already and was -// linked. -// -// If more than one message of this type is on the stack, it chooses the -// one that is deepest in the tree (if necessary, we could give the caller -// more control over this). -bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); - -// Convenience function for registering the given handler for the given -// field path. This will overwrite any startsubmsg handlers that were -// previously registered along the path. These can be overwritten again -// later if desired. -// TODO: upb_register_path_submsg()? -void upb_register_path_value(upb_handlers *h, const char *path, - upb_value_handler_t value, upb_value fval); - -// Convenience function for registering a single set of handlers on every -// message in our hierarchy. mvals are bound to upb_msgdef* and fvals are -// bound to upb_fielddef*. Any of the handlers can be NULL. -void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, - upb_endmsg_handler_t end, - upb_value_handler_t value, - upb_startsubmsg_handler_t startsubmsg, - upb_endsubmsg_handler_t endsubmsg, - upb_unknownval_handler_t unknown); - -// TODO: for clients that want to increase efficiency by preventing bytesrcs -// from automatically being converted to strings in the value callback. -// INLINE void upb_handlers_use_bytesrcs(upb_handlers *h, bool use_bytesrcs); - -// Low-level functions -- internal-only. -void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, - upb_value_handler_t value, upb_value fval); -void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, - upb_startsubmsg_handler_t start, - upb_endsubmsg_handler_t end, - upb_value fval); -void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated, int frames); -void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, bool repeated); -void upb_handlers_typed_pop(upb_handlers *h); - -INLINE upb_msgent *upb_handlers_getmsgent(upb_handlers *h, upb_fieldent *f) { - assert(f->msgent_index != -1); - return &h->msgs[f->msgent_index]; -} -upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum); - - -/* upb_dispatcher *************************************************************/ - -// upb_dispatcher can be used by sources of data to invoke the appropriate -// handlers. It takes care of details such as: -// - ensuring all endmsg callbacks (cleanup handlers) are called. -// - propagating status all the way back to the top-level message. -// - handling UPB_BREAK properly (clients only need to handle UPB_SKIPSUBMSG). -// - handling UPB_SKIPSUBMSG if the client doesn't (but this is less -// efficient, because then you can't skip the actual work). -// - tracking the stack of closures. -// -// TODO: it might be best to actually surface UPB_BREAK to clients in the case -// that the can't efficiently skip the submsg; eg. with groups. Then the client -// would know to just unwind the stack without bothering to consume the rest of -// the input. On the other hand, it might be important for all the input to be -// consumed, like if this is a submessage of a larger stream. - -typedef struct { - upb_fieldent *f; - void *closure; - // Relative to the beginning of this buffer. - // For groups and the top-level: UINT32_MAX. - uint32_t end_offset; - bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND -} upb_dispatcher_frame; - -typedef struct { - upb_dispatcher_frame *top, *limit; - - upb_handlers *handlers; - - // Msg and dispatch table for the current level. - upb_msgent *msgent; - upb_inttable *dispatch_table; - - // The number of startsubmsg calls without a corresponding endsubmsg call. - int current_depth; - - // For all frames >= skip_depth, we are skipping all values in the submsg. - // For all frames >= noframe_depth, we did not even push a frame. - // These are INT_MAX when nothing is being skipped. - // Invariant: noframe_depth >= skip_depth - int skip_depth; - int noframe_depth; - - // Depth of stack entries we'll skip if a callback returns UPB_BREAK. - int delegated_depth; - - // Stack. - upb_status status; - upb_dispatcher_frame stack[UPB_MAX_NESTING]; -} upb_dispatcher; - -INLINE bool upb_dispatcher_skipping(upb_dispatcher *d) { - return d->current_depth >= d->skip_depth; -} - -// If true, upb_dispatcher_skipping(d) must also be true. -INLINE bool upb_dispatcher_noframe(upb_dispatcher *d) { - return d->current_depth >= d->noframe_depth; -} - - -void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h); -void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset); -void upb_dispatcher_uninit(upb_dispatcher *d); - -upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); - -// Looks up a field by number for the current message. -INLINE upb_fieldent *upb_dispatcher_lookup(upb_dispatcher *d, - upb_field_number_t n) { - return (upb_fieldent*)upb_inttable_fastlookup( - d->dispatch_table, n, sizeof(upb_fieldent)); -} - -// Dispatches values or submessages -- the client is responsible for having -// previously looked up the field. -upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fieldent *f, - size_t userval); -upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); - -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fieldent *f, - upb_value val) { - if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; - upb_flow_t flow = f->cb.value(d->top->closure, f->fval, val); - if (flow != UPB_CONTINUE) { - d->noframe_depth = d->current_depth + 1; - d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth; - return UPB_SKIPSUBMSG; - } - return UPB_CONTINUE; -} -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, upb_field_number_t n, - upb_value val) { - // TODO. - (void)d; - (void)n; - (void)val; - return UPB_CONTINUE; -} -INLINE bool upb_dispatcher_stackempty(upb_dispatcher *d) { - return d->top == d->stack; -} - -/* upb_bytesrc ****************************************************************/ - -// upb_bytesrc is a pull interface for streams of bytes, basically an -// abstraction of read()/fread(), but it avoids copies where possible. - -typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_status *status); - -typedef struct { - upb_bytesrc_read_fptr read; - upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtbl; - -struct _upb_bytesrc { - upb_bytesrc_vtbl *vtbl; -}; - -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { - s->vtbl = vtbl; -} - -// Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If 0, indicates error and puts details in "status". -INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status) { - return src->vtbl->read(src, buf, count, status); -} - -// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure -// that "str" is created or just recycled. Returns "false" if no data was -// returned, either due to error or EOF (check status for details). -// -// In comparison to upb_bytesrc_read(), this call can possibly alias existing -// string data (which avoids a copy). On the other hand, if the data was *not* -// already in an existing string, this copies it into a upb_string, and if the -// data needs to be put in a specific range of memory (because eg. you need to -// put it into a different kind of string object) then upb_bytesrc_get() could -// save you a copy. -INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - return src->vtbl->getstr(src, str, status); -} - - -/* upb_bytesink ***************************************************************/ - -// upb_bytesink: push interface for streams of bytes, basically an abstraction -// of write()/fwrite(), but it avoids copies where possible. - -struct _upb_bytesink; -typedef struct _upb_bytesink upb_bytesink; -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str, upb_status *status); -typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); - -typedef struct { - upb_bytesink_putstr_fptr putstr; - upb_bytesink_vprintf_fptr vprintf; -} upb_bytesink_vtbl; - -struct _upb_bytesink { - upb_bytesink_vtbl *vtbl; -}; - -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { - s->vtbl = vtbl; -} - - -// TODO: Figure out how buffering should be handled. Should the caller buffer -// data and only call these functions when a buffer is full? Seems most -// efficient, but then buffering has to be configured in the caller, which -// could be anything, which makes it hard to have a standard interface for -// controlling buffering. -// -// The downside of having the bytesink buffer is efficiency: the caller is -// making more (virtual) function calls, and the caller can't arrange to have -// a big contiguous buffer. The bytesink can do this, but will have to copy -// to make the data contiguous. - -// Returns the number of bytes written. -INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, - const char *fmt, ...) { - va_list args; - va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); - va_end(args); - return ret; -} - -// Puts the given string, returning true if the operation was successful, otherwise -// check "status" for details. Ownership of the string is *not* passed; if -// the callee wants a reference he must call upb_string_getref() on it. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status) { - return sink->vtbl->putstr(sink, str, status); -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_strstream.c b/src/upb_strstream.c index 37b5179..284a4d7 100644 --- a/src/upb_strstream.c +++ b/src/upb_strstream.c @@ -8,6 +8,7 @@ #include "upb_strstream.h" #include +#include "upb_string.h" /* upb_stringsrc **************************************************************/ diff --git a/src/upb_strstream.h b/src/upb_strstream.h index 8da5393..e092b55 100644 --- a/src/upb_strstream.h +++ b/src/upb_strstream.h @@ -11,7 +11,7 @@ #ifndef UPB_STRSTREAM_H #define UPB_STRSTREAM_H -#include "upb_stream.h" +#include "upb_bytestream.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_textprinter.h b/src/upb_textprinter.h index aa9febb..f8c7f78 100644 --- a/src/upb_textprinter.h +++ b/src/upb_textprinter.h @@ -8,7 +8,8 @@ #ifndef UPB_TEXT_H_ #define UPB_TEXT_H_ -#include "upb_stream.h" +#include "upb_bytestream.h" +#include "upb_handlers.h" #ifdef __cplusplus extern "C" { -- cgit v1.2.3