From 1782f28c866b3fc0534fba69e0802c24414bdaaa Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 8 May 2011 17:42:40 -0700 Subject: Documentation, some type renaming, nix unknown handler for now. --- src/upb_handlers.h | 222 +++++++++++++++++++++++------------------------------ 1 file changed, 95 insertions(+), 127 deletions(-) (limited to 'src/upb_handlers.h') diff --git a/src/upb_handlers.h b/src/upb_handlers.h index d0ef1a4..f36f1dd 100644 --- a/src/upb_handlers.h +++ b/src/upb_handlers.h @@ -4,29 +4,15 @@ * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * This file defines general-purpose streaming data interfaces: - * - * - upb_handlers: represents a set of callbacks, very much like in XML's SAX - * API, that a client can register to do a streaming tree traversal over a - * stream of structured protobuf data, without knowing where that data is - * coming from. - * - * - upb_bytesrc: a pull interface for streams of bytes, basically an - * abstraction of read()/fread(), but it avoids copies where possible. - * - * - upb_bytesink: push interface for streams of bytes, basically an - * abstraction of write()/fwrite(), but it avoids copies where possible. - * - * All of the encoders and decoders are based on these generic interfaces, - * which lets you write streaming algorithms that do not depend on a specific - * serialization format; for example, you can write a pretty printer that works - * with input that came from protobuf binary format, protobuf text format, or - * even an in-memory upb_msg -- the pretty printer will not know the - * difference. + * upb_handlers is a generic visitor-like interface for iterating over a stream + * of protobuf data. You can register function pointers that will be called + * for each message and/or field as the data is being parsed or iterated over, + * without having to know the source format that we are parsing from. This + * decouples the parsing logic from the processing logic. */ -#ifndef UPB_STREAM_H -#define UPB_STREAM_H +#ifndef UPB_HANDLERS_H +#define UPB_HANDLERS_H #include #include "upb.h" @@ -38,11 +24,50 @@ extern "C" { /* upb_handlers ***************************************************************/ -// A upb_handlers object is a table of callbacks that are bound to specific -// messages and fields. A consumer of data registers callbacks and then -// passes the upb_handlers object to the producer, which calls them at the -// appropriate times. - +// A upb_handlers object represents a graph of handlers. Each message can have +// a set of handlers as well as a set of fields which themselves have handlers. +// Fields that represent submessages or groups are linked to other message +// handlers, so the overall set of handlers can form a graph structure (which +// may be cyclic). +// +// The upb_mhandlers (message handlers) object can have the following handlers: +// +// static upb_flow_t startmsg(void *closure) { +// // Called when the message begins. "closure" was supplied by our caller. +// return UPB_CONTINUE; +// } +// +// static void endmsg(void *closure, upb_status *status) { +// // Called when processing of this message ends, whether in success or +// // failure. "status" indicates the final status of processing, and can +// / also be modified in-place to update the final status. +// // +// // Since this callback is guaranteed to always be called eventually, it +// // can be used to free any resources that were allocated during processing. +// } +// +// TODO: unknown field handler. +// +// The upb_fhandlers (field handlers) object can have the following handlers: +// +// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { +// // Called when the field's value is encountered. "fval" contains +// // whatever value was bound to this field at registration type +// // (for upb_register_all(), this will be the field's upb_fielddef*). +// return UPB_CONTINUE; +// } +// +// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { +// // Called when a submessage begins. The second element of the return +// // value is the closure for the submessage. +// return UPB_CONTINUE_WITH(closure); +// } +// +// static upb_flow_t endsubmsg(void *closure, upb_value fval) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// // All handlers except the endmsg handler return a value from this enum, to // control whether parsing will continue or not. typedef enum { @@ -51,10 +76,7 @@ typedef enum { // Halt processing permanently (in a non-resumable way). The endmsg handlers // for any currently open messages will be called which can supply a more - // specific status message. If UPB_BREAK is returned from inside a delegated - // message, processing will continue normally in the containing message (though - // the containing message can inspect the returned status and choose to also - // return UPB_BREAK if it is not ok). + // specific status message. UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top @@ -70,24 +92,22 @@ typedef enum { // TODO: Add UPB_SUSPEND, for resumable producers/consumers. } upb_flow_t; +// Typedefs for all of the handler functions defined above. typedef struct _upb_sflow upb_sflow_t; -typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); -typedef void (*upb_endmsg_handler_t)(void *closure, upb_status *status); -typedef upb_flow_t (*upb_value_handler_t)( - void *closure, upb_value fval, upb_value val); -typedef upb_sflow_t (*upb_startsubmsg_handler_t)( - void *closure, upb_value fval); -typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval); -typedef upb_flow_t (*upb_unknownval_handler_t)( - void *closure, upb_field_number_t fieldnum, upb_value val); - +typedef upb_flow_t (upb_startmsg_handler)(void *c); +typedef void (upb_endmsg_handler)(void *c, upb_status *status); +typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val); +typedef upb_sflow_t (upb_startsubmsg_handler)(void *closure, upb_value fval); +typedef upb_flow_t (upb_endsubmsg_handler)(void *closure, upb_value fval); + +// No-op implementations of all of the above handlers. Use these instead of +// rolling your own -- the JIT can recognize these and optimize away the call. upb_flow_t upb_startmsg_nop(void *closure); void upb_endmsg_nop(void *closure, upb_status *status); upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val); upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval); upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval); -upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, - upb_value val); + struct _upb_decoder; typedef struct _upb_fieldent { bool junk; @@ -100,21 +120,20 @@ typedef struct _upb_fieldent { int32_t msgent_index; upb_value fval; union { - upb_value_handler_t value; - upb_startsubmsg_handler_t startsubmsg; + upb_value_handler *value; + upb_startsubmsg_handler *startsubmsg; } cb; - upb_endsubmsg_handler_t endsubmsg; + upb_endsubmsg_handler *endsubmsg; uint32_t jit_pclabel; uint32_t jit_pclabel_notypecheck; uint32_t jit_submsg_done_pclabel; void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); -} upb_fieldent; +} upb_fhandlers; typedef struct _upb_msgent { - upb_startmsg_handler_t startmsg; - upb_endmsg_handler_t endmsg; - upb_unknownval_handler_t unknownval; - // Maps field number -> upb_fieldent. + upb_startmsg_handler *startmsg; + upb_endmsg_handler *endmsg; + // Maps field number -> upb_fhandlers. upb_inttable fieldtab; uint32_t jit_startmsg_pclabel; uint32_t jit_endofbuf_pclabel; @@ -126,7 +145,7 @@ typedef struct _upb_msgent { // Currently keyed on field number. Could also try keying it // on encoded or decoded tag, or on encoded field number. void **tablearray; -} upb_msgent; +} upb_mhandlers; typedef struct { upb_msgdef *msgdef; @@ -135,10 +154,10 @@ typedef struct { struct _upb_handlers { // Array of msgdefs, [0]=toplevel. - upb_msgent *msgs; + upb_mhandlers *msgs; int msgs_len, msgs_size; upb_msgdef *toplevel_msgdef; // We own a ref. - upb_msgent *msgent; + upb_mhandlers *msgent; upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit; bool should_jit; }; @@ -161,69 +180,19 @@ INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { #define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) #define UPB_S_BREAK UPB_SFLOW(UPB_BREAK, NULL) -// Each message can have its own set of handlers. Here are empty definitions -// of the handlers for convenient copy/paste. -// TODO: Should endsubmsg get a copy of the upb_status*, so it can decide what -// to do in the case of a delegated failure? -// -// static upb_flow_t startmsg(void *closure) { -// // Called when the message begins. "closure" was supplied by our caller. -// // "mval" is whatever was bound to this message at registration time (for -// // upb_register_all() it will be its upb_msgdef*). -// return UPB_CONTINUE; -// } -// -// static void endmsg(void *closure, upb_status *status) { -// // Called when processing of this top-level message ends, whether in -// // success or failure. "status" indicates the final status of processing, -// // and can also be modified in-place to update the final status. -// // -// // Since this callback is guaranteed to always be called eventually, it -// // can be used to free any resources that were allocated during processing. -// } -// -// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { -// // Called for every non-submessage value in the stream. "fval" contains -// // whatever value was bound to this field at registration type -// // (for upb_register_all(), this will be the field's upb_fielddef*). -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { -// // Called when a submessage begins. The second element of the return -// // value is the closure for the submessage. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endsubmsg(void *closure, upb_value fval) { -// // Called when a submessage ends. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, -// upb_value val) { -// // Called with an unknown value is encountered. -// return UPB_CONTINUE; -// } - // Functions to register the above handlers. -// TODO: as an optimization, we could special-case handlers that don't -// need fval, to avoid even generating the code that sets the argument. -// If a value does not have a handler registered and there is no unknownval -// handler, the value will be skipped. -void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, - upb_endmsg_handler_t endmsg); +void upb_register_startend(upb_handlers *h, upb_startmsg_handler *startmsg, + upb_endmsg_handler *endmsg); void upb_register_value(upb_handlers *h, upb_fielddef *f, - upb_value_handler_t value, upb_value fval); -void upb_register_unknownval(upb_handlers *h, upb_unknownval_handler_t unknown); + upb_value_handler *value, upb_value fval); // To register handlers for a submessage, push the fielddef and pop it // when you're done. This can be used to delegate a submessage to a // different processing component which does not need to be aware whether // it is at the top level or not. void upb_handlers_push(upb_handlers *h, upb_fielddef *f, - upb_startsubmsg_handler_t start, - upb_endsubmsg_handler_t end, upb_value fval, + upb_startsubmsg_handler *start, + upb_endsubmsg_handler *end, upb_value fval, bool delegate); void upb_handlers_pop(upb_handlers *h, upb_fielddef *f); @@ -245,17 +214,16 @@ bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); // later if desired. // TODO: upb_register_path_submsg()? void upb_register_path_value(upb_handlers *h, const char *path, - upb_value_handler_t value, upb_value fval); + upb_value_handler *value, upb_value fval); // Convenience function for registering a single set of handlers on every // message in our hierarchy. mvals are bound to upb_msgdef* and fvals are // bound to upb_fielddef*. Any of the handlers can be NULL. -void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, - upb_endmsg_handler_t end, - upb_value_handler_t value, - upb_startsubmsg_handler_t startsubmsg, - upb_endsubmsg_handler_t endsubmsg, - upb_unknownval_handler_t unknown); +void upb_register_all(upb_handlers *h, upb_startmsg_handler *start, + upb_endmsg_handler *end, + upb_value_handler *value, + upb_startsubmsg_handler *startsubmsg, + upb_endsubmsg_handler *endsubmsg); // TODO: for clients that want to increase efficiency by preventing bytesrcs // from automatically being converted to strings in the value callback. @@ -264,11 +232,11 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, // Low-level functions -- internal-only. void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated, - upb_value_handler_t value, upb_value fval); + upb_value_handler *value, upb_value fval); void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated, - upb_startsubmsg_handler_t start, - upb_endsubmsg_handler_t end, + upb_startsubmsg_handler *start, + upb_endsubmsg_handler *end, upb_value fval); void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated, int frames); @@ -276,11 +244,11 @@ void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated); void upb_handlers_typed_pop(upb_handlers *h); -INLINE upb_msgent *upb_handlers_getmsgent(upb_handlers *h, upb_fieldent *f) { +INLINE upb_mhandlers *upb_handlers_getmsgent(upb_handlers *h, upb_fhandlers *f) { assert(f->msgent_index != -1); return &h->msgs[f->msgent_index]; } -upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum); +upb_fhandlers *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum); /* upb_dispatcher *************************************************************/ @@ -301,7 +269,7 @@ upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number // consumed, like if this is a submessage of a larger stream. typedef struct { - upb_fieldent *f; + upb_fhandlers *f; void *closure; // Relative to the beginning of this buffer. // For groups and the top-level: UINT32_MAX. @@ -315,7 +283,7 @@ typedef struct { upb_handlers *handlers; // Msg and dispatch table for the current level. - upb_msgent *msgent; + upb_mhandlers *msgent; upb_inttable *dispatch_table; // The number of startsubmsg calls without a corresponding endsubmsg call. @@ -354,20 +322,20 @@ upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); // Looks up a field by number for the current message. -INLINE upb_fieldent *upb_dispatcher_lookup(upb_dispatcher *d, +INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, upb_field_number_t n) { - return (upb_fieldent*)upb_inttable_fastlookup( - d->dispatch_table, n, sizeof(upb_fieldent)); + return (upb_fhandlers*)upb_inttable_fastlookup( + d->dispatch_table, n, sizeof(upb_fhandlers)); } // Dispatches values or submessages -- the client is responsible for having // previously looked up the field. upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fieldent *f, + upb_fhandlers *f, size_t userval); upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fieldent *f, +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, upb_value val) { if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; upb_flow_t flow = f->cb.value(d->top->closure, f->fval, val); -- cgit v1.2.3