diff options
author | Josh Haberman <haberman@google.com> | 2013-02-15 16:27:18 -0800 |
---|---|---|
committer | Josh Haberman <haberman@google.com> | 2013-02-15 16:27:18 -0800 |
commit | 7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 (patch) | |
tree | b4b35967b3322c65cfb1a32220e8718de09d85fc /upb/handlers.h | |
parent | ea198bdcf947ba4bd51474bdd4f7b82b5e4cf41d (diff) |
Sync with 8 months of Google-internal development.
Many things have changed and been simplified.
The memory-management story for upb_def and upb_handlers
is much more robust; upb_def and upb_handlers should be
fairly stable interfaces now. There is still much work
to do for the runtime component (upb_sink).
Diffstat (limited to 'upb/handlers.h')
-rw-r--r-- | upb/handlers.h | 1014 |
1 files changed, 652 insertions, 362 deletions
diff --git a/upb/handlers.h b/upb/handlers.h index 6d8f9f2..094702e 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -1,399 +1,689 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * upb_handlers is a generic visitor-like interface for iterating over a stream - * of protobuf data. You can register function pointers that will be called - * for each message and/or field as the data is being parsed or iterated over, - * without having to know the source format that we are parsing from. This - * decouples the parsing logic from the processing logic. + * A upb_handlers is like a virtual table for a upb_msgdef. Each field of the + * message can have associated functions that will be called when we are + * parsing or visiting a stream of data. This is similar to how handlers work + * in SAX (the Simple API for XML). * - * TODO: should we allow handlers to longjmp()? Would be necessary to eg. let - * a Lua handler "yield" from the current coroutine. I *think* everything - * would "just work" with our current decoder. + * The handlers have no idea where the data is coming from, so a single set of + * handlers could be used with two completely different data sources (for + * example, a parser and a visitor over in-memory objects). This decoupling is + * the most important feature of upb, because it allows parsers and serializers + * to be highly reusable. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_HANDLERS_H #define UPB_HANDLERS_H -#include "upb/upb.h" #include "upb/def.h" -#include "upb/bytestream.h" #ifdef __cplusplus -extern "C" { +namespace upb { class Handlers; } +typedef upb::Handlers upb_handlers; +#else +struct upb_handlers; +typedef struct upb_handlers upb_handlers; #endif -/* Handlers protocol definition ***********************************************/ - -// A upb_handlers object represents a graph of handlers. Each message can have -// a set of handlers as well as a set of fields which themselves have handlers. -// Fields that represent submessages or groups are linked to other message -// handlers, so the overall set of handlers can form a graph structure (which -// may be cyclic). -// -// The upb_mhandlers (message handlers) object can have the following handlers: -// -// static upb_flow_t startmsg(void *closure) { -// // Called when the message begins. "closure" was supplied by our caller. -// return UPB_CONTINUE; -// } -// -// static void endmsg(void *closure, upb_status *status) { -// // Called when processing of this message ends, whether in success or -// // failure. "status" indicates the final status of processing, and can -// / also be modified in-place to update the final status. -// // -// // Since this callback is guaranteed to always be called eventually, it -// // can be used to free any resources that were allocated during processing. -// } -// -// TODO: unknown field handler. -// -// The upb_fhandlers (field handlers) object can have the following handlers: -// -// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { -// // Called when the field's value is encountered. "fval" contains -// // whatever value was bound to this field at registration type -// // (for upb_register_all(), this will be the field's upb_fielddef*). -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { -// // Called when a submessage begins. The second element of the return -// // value is the closure for the submessage. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endsubmsg(void *closure, upb_value fval) { -// // Called when a submessage ends. -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startseq(void *closure, upb_value fval) { -// // Called when a sequence (repeated field) begins. The second element -// // of the return value is the closure for the sequence. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endseq(void *closure, upb_value fval) { -// // Called when a sequence ends. -// return UPB_CONTINUE; -// } -// -// All handlers except the endmsg handler return a value from this enum, to -// control whether parsing will continue or not. +// All the different types of handlers that can be registered. +// Only needed for the advanced functions in upb::Handlers. typedef enum { - // Data source should continue calling callbacks. - UPB_CONTINUE = 0, + UPB_HANDLER_INT32, + UPB_HANDLER_INT64, + UPB_HANDLER_UINT32, + UPB_HANDLER_UINT64, + UPB_HANDLER_FLOAT, + UPB_HANDLER_DOUBLE, + UPB_HANDLER_BOOL, + UPB_HANDLER_STARTSTR, + UPB_HANDLER_STRING, + UPB_HANDLER_ENDSTR, + UPB_HANDLER_STARTSUBMSG, + UPB_HANDLER_ENDSUBMSG, + UPB_HANDLER_STARTSEQ, + UPB_HANDLER_ENDSEQ, +} upb_handlertype_t; + +#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1) + +#define UPB_BREAK NULL + +// A convenient definition for when no closure is needed. +extern char _upb_noclosure; +#define UPB_NO_CLOSURE &_upb_noclosure + +// A selector refers to a specific field handler in the Handlers object +// (for example: the STARTSUBMSG handler for field "field15"). +typedef uint32_t upb_selector_t; - // Halt processing permanently (in a non-resumable way). The endmsg handlers - // for any currently open messages will be called which can supply a more - // specific status message. No further input data will be consumed. - UPB_BREAK = -1, +#ifdef __cplusplus - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). In other words, it is - // like a UPB_BREAK that applies only to the current level. +// A upb::Handlers object represents the set of handlers associated with a +// message in the graph of messages. You can think of it as a big virtual +// table with functions corresponding to all the events that can fire while +// parsing or visiting a message of a specific type. +// +// Any handlers that are not set behave as if they had successfully consumed +// the value. For start* handlers that return a void* closure, an unset handler +// will propagate the existing closure. +class upb::Handlers { + public: + typedef upb_selector_t Selector; + typedef upb_handlertype_t Type; + + typedef bool StartMessageHandler(void* closure); + typedef void EndMessageHandler(void* closure, Status* status); + typedef void* StartFieldHandler(void* closure, void* data); + typedef bool EndFieldHandler(void *closure, void *data); + typedef void* StartStringHandler(void *c, void *d, size_t size_hint); + typedef size_t StringHandler(void *c, void *d, const char *buf, size_t len); + + template <class T> struct Value { + typedef bool Handler(void* closure, void* data, T val); + }; + + typedef Value<int32_t>::Handler Int32Handler; + typedef Value<int64_t>::Handler Int64Handler; + typedef Value<uint32_t>::Handler Uint32Handler; + typedef Value<uint64_t>::Handler Uint64Handler; + typedef Value<float>::Handler FloatHandler; + typedef Value<double>::Handler DoubleHandler; + typedef Value<bool>::Handler BoolHandler; + + // Any function pointer can be converted to this and converted back to its + // correct type. + typedef void GenericFunction(); + + // For freeing handler data. + typedef void Free(void *data); + + typedef void HandlersCallback(void *closure, upb_handlers *h); + + // Returns a new handlers object for the given frozen msgdef. A single ref + // will belong to the given owner. + // Returns NULL if memory allocation failed. + static Handlers* New(const MessageDef* m, const void *owner); + + // Convenience function for registering a graph of handlers that mirrors the + // graph of msgdefs for some message. For "m" and all its children a new set + // of handlers will be created and the given callback will be invoked, + // allowing the client to register handlers for this message. Note that any + // subhandlers set by the callback will be overwritten. + static const Handlers* NewFrozen(const MessageDef *m, const void *owner, + HandlersCallback *callback, void *closure); + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Freezes the given set of handlers. You may not freeze a handler without + // also freezing any handlers they point to. In the future we may want to + // require that all fields of the submessage have had subhandlers set for + // them. + static bool Freeze(Handlers*const* handlers, int n, Status* s); + + // Returns the msgdef associated with this handlers object. + const MessageDef* message_def() const; + + // Sets the startmsg handler for the message, which is defined as follows: + // + // bool startmsg(void *closure) { + // // Called when the message begins. Returns true if processing should + // // continue. + // return true; + // } + void SetStartMessageHandler(StartMessageHandler *handler); + StartMessageHandler *GetStartMessageHandler() const; + + // Sets the endmsg handler for the message, which is defined as follows: + // + // void endmsg(void *closure, upb_status *status) { + // // Called when processing of this message ends, whether in success or + // // failure. "status" indicates the final status of processing, and + // // can also be modified in-place to update the final status. + // } + void SetEndMessageHandler(EndMessageHandler *handler); + EndMessageHandler *GetEndMessageHandler() const; + + // Sets the value handler for the given field, which is defined as follows + // (this is for an int32 field; other field types will pass their native + // C/C++ type for "val"): + // + // bool value(void *closure, void *d, int32_t val) { + // // Called when the field's value is encountered. "d" contains + // // whatever data was bound to this field when it was registered. + // // Returns true if processing should continue. + // return true; + // } + // + // The value type must exactly match f->type(). + // For example, SetInt32Handler() may only be used for fields of type + // UPB_TYPE_INT32, UPB_TYPE_SINT32, UPB_TYPE_SFIXED32, and UPB_TYPE_ENUM. + // + // "d" is the data that will be bound to this callback and passed to it. + // If "fr" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or has the wrong + // type for this handler. + bool SetInt32Handler (const FieldDef* f, Int32Handler* h, void* d, Free* fr); + bool SetInt64Handler (const FieldDef* f, Int64Handler* h, void* d, Free* fr); + bool SetUint32Handler(const FieldDef* f, Uint32Handler* h, void* d, Free* fr); + bool SetUint64Handler(const FieldDef* f, Uint64Handler* h, void* d, Free* fr); + bool SetFloatHandler (const FieldDef* f, FloatHandler* h, void* d, Free* fr); + bool SetDoubleHandler(const FieldDef* f, DoubleHandler* h, void* d, Free* fr); + bool SetBoolHandler (const FieldDef* f, BoolHandler* h, void* d, Free* fr); + + // Sets handlers for a string field, which are defined as follows: + // + // void* startstr(void *closure, void *data, size_t size_hint) { + // // Called when a string value begins. The return value indicates the + // // closure for the string. "size_hint" indicates the size of the + // // string if it is known, however if the string is length-delimited + // // and the end-of-string is not available size_hint will be zero. + // // This case is indistinguishable from the case where the size is + // // known to be zero. + // // + // // TODO(haberman): is it important to distinguish these cases? + // // If we had ssize_t as a type we could make -1 "unknown", but + // // ssize_t is POSIX (not ANSI) and therefore less portable. + // // In practice I suspect it won't be important to distinguish. + // return closure; + // } // - // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will - // be called to perform cleanup and return a status. Returning - // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg, - // endmsg, or endsubmsg handlers. + // size_t str(void *closure, void *data, const char *str, size_t len) { + // // Called for each buffer of string data; the multiple physical buffers + // // are all part of the same logical string. The return value indicates + // // how many bytes were consumed. If this number is less than "len", + // // this will also indicate that processing should be halted for now, + // // like returning false or UPB_BREAK from any other callback. If + // // number is greater than "len", the excess bytes will be skipped over + // // and not passed to the callback. + // return len; + // } // - // If UPB_SKIPSUBMSG is called from the top-level message, no further input - // data will be consumed. - UPB_SKIPSUBMSG = -2, + // bool endstr(void *closure, void *data) { + // // Called when a string value ends. + // return true; + // } + bool SetStartStringHandler(const FieldDef* f, StartStringHandler* h, + void* d, Free* fr); + bool SetStringHandler(const FieldDef* f, StringHandler* h, void* d, Free* fr); + bool SetEndStringHandler(const FieldDef* f, EndFieldHandler* h, + void* d, Free* fr); + + // A setter that is templated on the type of the value. + template<class T> bool SetValueHandler( + const FieldDef* f, typename Value<T>::Handler* h, void* d, Free* fr); + + // Sets the startseq handler, which is defined as follows: + // + // void *startseq(void *closure, void *data) { + // // Called when a sequence (repeated field) begins. The returned + // // pointer indicates the closure for the sequence (or UPB_BREAK + // // to interrupt processing). + // return closure; + // } + // + // Returns "false" if "f" does not belong to this message or is not a + // repeated field. + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + bool SetStartSequenceHandler(const FieldDef* f, StartFieldHandler *handler, + void* data, Free* cleanup); - // TODO: Add UPB_SUSPEND, for resumable producers/consumers. -} upb_flow_t; + // Sets the startsubmsg handler for the given field, which is defined as + // follows: + // + // void *startsubmsg(void *closure, void *data) { + // // Called when a submessage begins. The returned pointer indicates the + // // closure for the sequence (or UPB_BREAK to interrupt processing). + // return closure; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // submessage/group field. + bool SetStartSubMessageHandler(const FieldDef* f, StartFieldHandler *handler, + void* data, Free* cleanup); -// The startsubmsg handler needs to also pass a closure to the submsg. -typedef struct { - upb_flow_t flow; - void *closure; -} upb_sflow_t; + // Sets the endsubmsg handler for the given field, which is defined as + // follows: + // + // bool endsubmsg(void *closure, void *data) { + // // Called when a submessage ends. Returns true to continue processing. + // return true; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // submessage/group field. + bool SetEndSubMessageHandler(const FieldDef* f, EndFieldHandler *handler, + void* data, Free* cleanup); -INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { - upb_sflow_t ret = {flow, closure}; - return ret; -} -#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) -#define UPB_SBREAK UPB_SFLOW(UPB_BREAK, NULL) + // Starts the endsubseq handler for the given field, which is defined as + // follows: + // + // bool endseq(void *closure, void *data) { + // // Called when a sequence ends. Returns true continue processing. + // return true; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // repeated field. + bool SetEndSequenceHandler(const FieldDef* f, EndFieldHandler *handler, + void* data, Free* cleanup); + + // Sets or gets the object that specifies handlers for the given field, which + // must be a submessage or group. Returns NULL if no handlers are set. + bool SetSubHandlers(const FieldDef* f, const Handlers* sub); + const Handlers* GetSubHandlers(const FieldDef* f) const; + + // NOTE: The remaining functions in this class are mostly of interest to + // byte-code/JIT compilers (or upb internals); most users will not need them. + // These functions also require more care, since passing a selector that + // does not match the type of these handlers yields undefined behavior. + + // A selector refers to a specific field handler in the Handlers object + // (for example: the STARTSUBMSG handler for field "field15"). + // On success, returns true and stores the selector in "s". + // If the FieldDef or Type are invalid, returns false. + // The returned selector is ONLY valid for Handlers whose MessageDef + // contains this FieldDef. + static bool GetSelector(const FieldDef* f, Type type, Selector* s); + + // Returns the function pointer for this handler. It is the client's + // responsibility to cast to the correct function type before calling it. + GenericFunction* GetHandler(Selector selector); + + // Returns the handler data that was registered with this handler. + void* GetHandlerData(Selector selector); + + // Gets the byte offset from a Handlers* where the given handler can be found. + // Useful for JITs that want to read the pointer in their fast path. + static size_t GetHandlerOffset(Selector selector); + + // Could add any of the following functions as-needed, with some minor + // implementation changes: + // + // const FieldDef* GetFieldDef(Selector selector); + // static bool IsSequence(Selector selector); + // Selector GetEndSelector(Selector start_selector); -// Typedefs for all of the handler functions defined above. -typedef upb_flow_t (upb_startmsg_handler)(void *c); -typedef void (upb_endmsg_handler)(void *c, upb_status *status); -typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val); -typedef upb_sflow_t (upb_startfield_handler)(void *closure, upb_value fval); -typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval); + private: + UPB_DISALLOW_POD_OPS(Handlers); +#else +struct upb_handlers { +#endif + upb_refcounted base; + const upb_msgdef *msg; + bool (*startmsg)(void*); + void (*endmsg)(void*, upb_status*); + void *fh_base[1]; // Start of dynamically-sized field handler array. +}; -/* upb_fhandlers **************************************************************/ +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +typedef bool upb_startmsg_handler(void *c); +typedef void upb_endmsg_handler(void *c, upb_status *status); +typedef void* upb_startfield_handler(void *closure, void *d); +typedef bool upb_endfield_handler(void *closure, void *d); +typedef void upb_handlers_callback(void *closure, upb_handlers *h); +typedef void upb_handlerfree(void *d); +typedef void upb_func(); + +typedef bool upb_int32_handler(void *c, void *d, int32_t val); +typedef bool upb_int64_handler(void *c, void *d, int64_t val); +typedef bool upb_uint32_handler(void *c, void *d, uint32_t val); +typedef bool upb_uint64_handler(void *c, void *d, uint64_t val); +typedef bool upb_float_handler(void *c, void *d, float val); +typedef bool upb_double_handler(void *c, void *d, double val); +typedef bool upb_bool_handler(void *c, void *d, bool val); +typedef void* upb_startstr_handler(void *closure, void *d, size_t size_hint); +typedef size_t upb_string_handler(void *c, void *d, const char *buf, size_t n); + +upb_handlers *upb_handlers_new(const upb_msgdef *m, const void *owner); +const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const void *owner, + upb_handlers_callback *callback, + void *closure); + +// From upb_refcounted. +void upb_handlers_unref(const upb_handlers *h, const void *owner); +bool upb_handlers_isfrozen(const upb_handlers *h); +void upb_handlers_ref(const upb_handlers *h, const void *owner); +void upb_handlers_donateref( + const upb_handlers *h, const void *from, const void *to); +void upb_handlers_checkref(const upb_handlers *h, const void *owner); + +bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s); +const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h); +void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler); +upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h); +void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler); +upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h); +bool upb_handlers_setint32( + upb_handlers *h, const upb_fielddef *f, upb_int32_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setint64( + upb_handlers *h, const upb_fielddef *f, upb_int64_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint32( + upb_handlers *h, const upb_fielddef *f, upb_uint32_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint64( + upb_handlers *h, const upb_fielddef *f, upb_uint64_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setfloat( + upb_handlers *h, const upb_fielddef *f, upb_float_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setdouble( + upb_handlers *h, const upb_fielddef *f, upb_double_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setbool( + upb_handlers *h, const upb_fielddef *f, upb_bool_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartstr( + upb_handlers *h, const upb_fielddef *f, upb_startstr_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstring( + upb_handlers *h, const upb_fielddef *f, upb_string_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendstr( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartseq( + upb_handlers *h, const upb_fielddef *f, upb_startfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartsubmsg( + upb_handlers *h, const upb_fielddef *f, upb_startfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendsubmsg( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendseq( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setsubhandlers( + upb_handlers *h, const upb_fielddef *f, const upb_handlers *sub); +const upb_handlers *upb_handlers_getsubhandlers( + const upb_handlers *h, const upb_fielddef *f); +upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f); +bool upb_getselector( + const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s); +upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s); +void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s); +size_t upb_gethandleroffset(upb_selector_t s); + +// Internal-only. +uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f); +uint32_t upb_handlers_selectorcount(const upb_fielddef *f); +#ifdef __cplusplus +} // extern "C" +#endif -// A upb_fhandlers object represents the set of handlers associated with one -// specific message field. +// Convenience versions of the above that first look up the field by name. +#define DEFINE_NAME_SETTER(slot, type) \ + INLINE void upb_handlers_set ## slot ## _n( \ + upb_handlers *h, const char *name, type val, \ + void *d, upb_handlerfree *fr) { \ + upb_handlers_set ## slot(h, upb_msgdef_ntof( \ + upb_handlers_msgdef(h), name), val, d, fr); \ + } +DEFINE_NAME_SETTER(int32, upb_int32_handler*); +DEFINE_NAME_SETTER(int64, upb_int64_handler*); +DEFINE_NAME_SETTER(uint32, upb_uint32_handler*); +DEFINE_NAME_SETTER(uint64, upb_uint64_handler*); +DEFINE_NAME_SETTER(float, upb_float_handler*); +DEFINE_NAME_SETTER(double, upb_double_handler*); +DEFINE_NAME_SETTER(bool, upb_bool_handler*); +DEFINE_NAME_SETTER(startstr, upb_startstr_handler*); +DEFINE_NAME_SETTER(string, upb_string_handler*); +DEFINE_NAME_SETTER(endstr, upb_endfield_handler*); +DEFINE_NAME_SETTER(startseq, upb_startfield_handler*); +DEFINE_NAME_SETTER(startsubmsg, upb_startfield_handler*); +DEFINE_NAME_SETTER(endsubmsg, upb_endfield_handler*); +DEFINE_NAME_SETTER(endseq, upb_endfield_handler*); +#undef DEFINE_NAME_SETTER + +// Value writers for every in-memory type: write the data to a known offset +// from the closure "c." These depend on the fval being a pointer to a +// structure that is (or begins with) the upb_stdmsg_fval type. // -// TODO: remove upb_decoder-specific fields from this, and instead have -// upb_decoderplan make a deep copy of the whole graph with its own fields -// added. -struct _upb_decoder; -struct _upb_mhandlers; -typedef struct _upb_fieldent { - upb_fieldtype_t type; - bool repeated; - uint32_t refcount; - uint32_t number; +// TODO(haberman): These are hacky; remove them and replace with an API that +// lets you set a simple "writer" handler in a way that can generate +// specialized code right then. + +typedef struct upb_stdmsg_fval { +#ifdef __cplusplus + upb_stdmsg_fval(size_t offset_, int32_t hasbit_) + : offset(offset_), + hasbit(hasbit_) { + } +#endif + size_t offset; int32_t hasbit; - struct _upb_mhandlers *msg; - struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true. - upb_value fval; - upb_value_handler *value; - upb_startfield_handler *startsubmsg; - upb_endfield_handler *endsubmsg; - upb_startfield_handler *startseq; - upb_endfield_handler *endseq; -#ifdef UPB_USE_JIT_X64 - uint32_t jit_pclabel; - uint32_t jit_pclabel_notypecheck; - uint32_t jit_submsg_done_pclabel; +} upb_stdmsg_fval; + +#ifdef __cplusplus +extern "C" { #endif -} upb_fhandlers; - -// fhandlers are created as part of a upb_handlers instance, but can be ref'd -// and unref'd to prolong the life of the handlers. -void upb_fhandlers_ref(upb_fhandlers *m); -void upb_fhandlers_unref(upb_fhandlers *m); - -// upb_fhandlers accessors -#define UPB_FHANDLERS_ACCESSORS(name, type) \ - INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \ - INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; } -// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts -// the handlers. -UPB_FHANDLERS_ACCESSORS(fval, upb_value) -UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*) -UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*) -UPB_FHANDLERS_ACCESSORS(endsubmsg, upb_endfield_handler*) -UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*) -UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*) -UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*) -UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*) -// If set to >= 0, the hasbit will automatically be set when the corresponding -// field is parsed (when a JIT is enabled, this can be significantly more -// efficient than setting the hasbit yourself inside the callback). For values -// it is undefined whether the hasbit is set before or after the callback is -// called. For seq and submsg, the hasbit is set *after* the start handler is -// called, but before any of the handlers for the submsg or sequence. -UPB_FHANDLERS_ACCESSORS(hasbit, int32_t) - - -/* upb_mhandlers **************************************************************/ - -// A upb_mhandlers object represents the set of handlers associated with a -// message in the graph of messages. - -typedef struct _upb_mhandlers { - uint32_t refcount; - upb_startmsg_handler *startmsg; - upb_endmsg_handler *endmsg; - upb_inttable fieldtab; // Maps field number -> upb_fhandlers. - bool is_group; -#ifdef UPB_USE_JIT_X64 - // Used inside the JIT to track labels (jmp targets) in the generated code. - uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message. - uint32_t jit_afterstartmsg_pclabel; // After calling the startmsg handler. - uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end? - uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message. - uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup. - uint32_t jit_unknownfield_pclabel; // Parsed an unknown field. - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; - // Pointer to the JIT code for parsing this message. - void *jit_func; +bool upb_stdmsg_setint32(void *c, void *d, int32_t val); +bool upb_stdmsg_setint64(void *c, void *d, int64_t val); +bool upb_stdmsg_setuint32(void *c, void *d, uint32_t val); +bool upb_stdmsg_setuint64(void *c, void *d, uint64_t val); +bool upb_stdmsg_setfloat(void *c, void *d, float val); +bool upb_stdmsg_setdouble(void *c, void *d, double val); +bool upb_stdmsg_setbool(void *c, void *d, bool val); +#ifdef __cplusplus +} // extern "C" #endif -} upb_mhandlers; - -// mhandlers are created as part of a upb_handlers instance, but can be ref'd -// and unref'd to prolong the life of the handlers. -void upb_mhandlers_ref(upb_mhandlers *m); -void upb_mhandlers_unref(upb_mhandlers *m); - -// Creates a new field with the given name and number. There must not be an -// existing field with either this name or number or abort() will be called. -// TODO: this should take a name also. -upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated); -// Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the -// given submessage must not have any fields with this field number. -upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated, - upb_mhandlers *subm); - -// upb_mhandlers accessors. -#define UPB_MHANDLERS_ACCESSORS(name, type) \ - INLINE void upb_mhandlers_set ## name(upb_mhandlers *m, type v){m->name = v;} \ - INLINE type upb_mhandlers_get ## name(upb_mhandlers *m) { return m->name; } -UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*); -UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*); - -// Returns fhandlers for the given field, or NULL if none. -upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n); - - -/* upb_handlers ***************************************************************/ - -struct _upb_handlers { - uint32_t refcount; - upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel. - int msgs_len, msgs_size; - bool should_jit; -}; -typedef struct _upb_handlers upb_handlers; - -upb_handlers *upb_handlers_new(void); -void upb_handlers_ref(upb_handlers *h); -void upb_handlers_unref(upb_handlers *h); - -// Appends a new message to the graph of handlers and returns it. This message -// can be obtained later at index upb_handlers_msgcount()-1. All handlers will -// be initialized to no-op handlers. -upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h); -upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index); - -// Convenience function for registering handlers for all messages and -// fields in a msgdef and all its children. For every registered message -// "msgreg_cb" will be called with the newly-created mhandlers, and likewise -// with "fieldreg_cb" -// -// See upb_handlers_reghandlerset() below for an example. -typedef void upb_onmsgreg( - void *closure, upb_mhandlers *mh, const upb_msgdef *m); -typedef void upb_onfieldreg( - void *closure, upb_fhandlers *fh, const upb_fielddef *f); -upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure); - -// Convenience function for registering a set of handlers for all messages and -// fields in a msgdef and its children, with the fval bound to the upb_fielddef. -// Any of the handlers may be NULL, in which case no callback will be set and -// the nop callback will be used. -typedef struct { - upb_startmsg_handler *startmsg; - upb_endmsg_handler *endmsg; - upb_value_handler *value; - upb_startfield_handler *startsubmsg; - upb_endfield_handler *endsubmsg; - upb_startfield_handler *startseq; - upb_endfield_handler *endseq; -} upb_handlerset; - -INLINE void upb_onmreg_hset(void *c, upb_mhandlers *mh, const upb_msgdef *m) { - (void)m; - upb_handlerset *hs = (upb_handlerset*)c; - if (hs->startmsg) upb_mhandlers_setstartmsg(mh, hs->startmsg); - if (hs->endmsg) upb_mhandlers_setendmsg(mh, hs->endmsg); -} -INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) { - upb_handlerset *hs = (upb_handlerset*)c; - if (hs->value) upb_fhandlers_setvalue(fh, hs->value); - if (hs->startsubmsg) upb_fhandlers_setstartsubmsg(fh, hs->startsubmsg); - if (hs->endsubmsg) upb_fhandlers_setendsubmsg(fh, hs->endsubmsg); - if (hs->startseq) upb_fhandlers_setstartseq(fh, hs->startseq); - if (hs->endseq) upb_fhandlers_setendseq(fh, hs->endseq); - upb_value val; - upb_value_setfielddef(&val, f); - upb_fhandlers_setfval(fh, val); -} -INLINE upb_mhandlers *upb_handlers_reghandlerset( - upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) { - return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs); -} - - -/* upb_dispatcher *************************************************************/ - -// WARNING: upb_dispatcher should be considered INTERNAL-ONLY. The interface -// between it and upb_decoder is somewhat tightly coupled and may change. -// -// upb_dispatcher can be used by sources of data to invoke the appropriate -// handlers on a upb_handlers object. Besides maintaining the runtime stack of -// closures and handlers, the dispatcher checks the return status of user -// callbacks and properly handles statuses other than UPB_CONTINUE, invoking -// "skip" or "exit" handlers on the underlying data source as appropriate. - -typedef struct { - upb_fhandlers *f; - void *closure; - uint64_t end_ofs; - bool is_sequence; // frame represents seq or submsg? (f might be both). - bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX - // (strings aren't pushed). -} upb_dispatcher_frame; - -typedef void upb_exit_handler(void *); - -typedef struct { - upb_dispatcher_frame *top, *limit; - - // Msg and dispatch table for the current level. - upb_mhandlers *msgent; - upb_mhandlers *toplevel_msgent; - upb_exit_handler UPB_NORETURN *exitjmp; - void *srcclosure; - bool top_is_implicit; - - // Stack. - upb_status *status; - upb_dispatcher_frame stack[UPB_MAX_NESTING]; -} upb_dispatcher; - -// Caller retains ownership of the status object. -void upb_dispatcher_init(upb_dispatcher *d, upb_status *status, - upb_exit_handler UPB_NORETURN *exit, void *closure); -upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure, - upb_mhandlers *top_msg); -void upb_dispatcher_uninit(upb_dispatcher *d); - -// Tests whether the message could legally end here (either the stack is empty -// or the only open stack frame is implicit). -bool upb_dispatcher_islegalend(upb_dispatcher *d); - -// Unwinds one or more stack frames based on the given flow constant that was -// just returned from a handler. Calls end handlers as appropriate. -void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN; - -INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) { - char *p = (char*)_p; - if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8)); -} - -// Dispatch functions -- call the user handler and handle errors. -INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, - upb_value val) { - upb_flow_t flow = UPB_CONTINUE; - if (f->value) flow = f->value(d->top->closure, f->fval, val); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); -} -void upb_dispatch_startmsg(upb_dispatcher *d); -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); -upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fhandlers *f); -upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d); -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, - upb_fhandlers *f); -upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d); #ifdef __cplusplus -} /* extern "C" */ + +namespace upb { + +// C++ Wrappers +inline Handlers* Handlers::New(const MessageDef* m, const void *owner) { + return upb_handlers_new(m, owner); +} +inline const Handlers* Handlers::NewFrozen( + const MessageDef *m, const void *owner, + upb_handlers_callback *callback, void *closure) { + return upb_handlers_newfrozen(m, owner, callback, closure); +} +inline bool Handlers::IsFrozen() const { + return upb_handlers_isfrozen(this); +} +inline void Handlers::Ref(const void* owner) const { + upb_handlers_ref(this, owner); +} +inline void Handlers::Unref(const void* owner) const { + upb_handlers_unref(this, owner); +} +inline void Handlers::DonateRef(const void *from, const void *to) const { + upb_handlers_donateref(this, from, to); +} +inline void Handlers::CheckRef(const void *owner) const { + upb_handlers_checkref(this, owner); +} +inline bool Handlers::Freeze(Handlers*const* handlers, int n, Status* s) { + return upb_handlers_freeze(handlers, n, s); +} +inline const MessageDef* Handlers::message_def() const { + return upb_handlers_msgdef(this); +} +inline void Handlers::SetStartMessageHandler( + Handlers::StartMessageHandler *handler) { + upb_handlers_setstartmsg(this, handler); +} +inline void Handlers::SetEndMessageHandler( + Handlers::EndMessageHandler *handler) { + upb_handlers_setendmsg(this, handler); +} +inline bool Handlers::SetInt32Handler( + const FieldDef *f, Handlers::Int32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint32(this, f, handler, d, fr); +} +inline bool Handlers::SetInt64Handler( + const FieldDef *f, Handlers::Int64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint64(this, f, handler, d, fr); +} +inline bool Handlers::SetUint32Handler( + const FieldDef *f, Handlers::Uint32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint32(this, f, handler, d, fr); +} +inline bool Handlers::SetUint64Handler( + const FieldDef *f, Handlers::Uint64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint64(this, f, handler, d, fr); +} +inline bool Handlers::SetFloatHandler( + const FieldDef *f, Handlers::FloatHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setfloat(this, f, handler, d, fr); +} +inline bool Handlers::SetDoubleHandler( + const FieldDef *f, Handlers::DoubleHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setdouble(this, f, handler, d, fr); +} +inline bool Handlers::SetBoolHandler( + const FieldDef *f, Handlers::BoolHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setbool(this, f, handler, d, fr); +} +inline bool Handlers::SetStartStringHandler( + const FieldDef* f, Handlers::StartStringHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setstartstr(this, f, handler, d, fr); +} +inline bool Handlers::SetEndStringHandler( + const FieldDef* f, Handlers::EndFieldHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setendstr(this, f, handler, d, fr); +} +inline bool Handlers::SetStringHandler( + const FieldDef *f, Handlers::StringHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstring(this, f, handler, d, fr); +} +inline bool Handlers::SetStartSequenceHandler( + const FieldDef* f, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartseq(this, f, handler, d, fr); +} +inline bool Handlers::SetStartSubMessageHandler( + const FieldDef* f, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartsubmsg(this, f, handler, d, fr); +} +inline bool Handlers::SetEndSubMessageHandler( + const FieldDef* f, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendsubmsg(this, f, handler, d, fr); +} +inline bool Handlers::SetEndSequenceHandler( + const FieldDef* f, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendseq(this, f, handler, d, fr); +} +inline bool Handlers::SetSubHandlers( + const FieldDef* f, const Handlers* sub) { + return upb_handlers_setsubhandlers(this, f, sub); +} +inline Handlers::StartMessageHandler *Handlers::GetStartMessageHandler() const { + return upb_handlers_getstartmsg(this); +} +inline Handlers::EndMessageHandler *Handlers::GetEndMessageHandler() const { + return upb_handlers_getendmsg(this); +} +inline const Handlers* Handlers::GetSubHandlers( + const FieldDef* f) const { + return upb_handlers_getsubhandlers(this, f); +} +inline bool Handlers::GetSelector( + const FieldDef* f, Handlers::Type type, Handlers::Selector* s) { + return upb_getselector(f, type, s); +} +inline Handlers::GenericFunction* Handlers::GetHandler( + Handlers::Selector selector) { + return upb_handlers_gethandler(this, selector); +} +inline void* Handlers::GetHandlerData(Handlers::Selector selector) { + return upb_handlers_gethandlerdata(this, selector); +} +inline size_t Handlers::GetHandlerOffset(Handlers::Selector selector) { + return upb_gethandleroffset(selector); +} + +#define SET_VALUE_HANDLER(type, ctype) \ + template<> \ + inline bool Handlers::SetValueHandler<ctype>( \ + const FieldDef* f, \ + typename Handlers::Value<ctype>::Handler* handler, \ + void* data, Handlers::Free* cleanup) { \ + return upb_handlers_set ## type(this, f, handler, data, cleanup); \ + } +SET_VALUE_HANDLER(double, double); +SET_VALUE_HANDLER(float, float); +SET_VALUE_HANDLER(uint64, uint64_t); +SET_VALUE_HANDLER(uint32, uint32_t); +SET_VALUE_HANDLER(int64, int64_t); +SET_VALUE_HANDLER(int32, int32_t); +SET_VALUE_HANDLER(bool, bool); +#undef SET_VALUE_HANDLER + +template <class T> void DeletePointer(void *p) { delete static_cast<T*>(p); } + +template <class T> +void SetStoreValueHandler( + const FieldDef* f, size_t offset, int32_t hasbit, Handlers* h); + +// A handy templated function that will retrieve a value handler for a given +// C++ type. +#define SET_STORE_VALUE_HANDLER(type, ctype) \ + template <> \ + inline void SetStoreValueHandler<ctype>(const FieldDef* f, size_t offset, \ + int32_t hasbit, Handlers* h) { \ + h->SetValueHandler<ctype>( \ + f, upb_stdmsg_set ## type, new upb_stdmsg_fval(offset, hasbit), \ + &upb::DeletePointer<upb_stdmsg_fval>); \ + } + +SET_STORE_VALUE_HANDLER(double, double); +SET_STORE_VALUE_HANDLER(float, float); +SET_STORE_VALUE_HANDLER(uint64, uint64_t); +SET_STORE_VALUE_HANDLER(uint32, uint32_t); +SET_STORE_VALUE_HANDLER(int64, int64_t); +SET_STORE_VALUE_HANDLER(int32, int32_t); +SET_STORE_VALUE_HANDLER(bool, bool); +#undef GET_VALUE_HANDLER + +} // namespace upb #endif #endif |