diff options
author | Joshua Haberman <joshua@reverberate.org> | 2011-07-15 12:05:43 -0700 |
---|---|---|
committer | Joshua Haberman <joshua@reverberate.org> | 2011-07-15 12:05:43 -0700 |
commit | 10265aa56b22ac4f04e7ba08330138e4507534e4 (patch) | |
tree | e821c85219a10b4ee3df715ab774465fdf87cf1d /upb/msg.h | |
parent | 6a1f3a66939308668ab8dce0d195afec16e02af9 (diff) |
Directory restructure.
Includes are now via upb/foo.h.
Files specific to the protobuf format are
now in upb/pb (the core library is concerned
with message definitions, handlers, and
byte streams, but knows nothing about any
particular serializationf format).
Diffstat (limited to 'upb/msg.h')
-rw-r--r-- | upb/msg.h | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/upb/msg.h b/upb/msg.h new file mode 100644 index 0000000..625d805 --- /dev/null +++ b/upb/msg.h @@ -0,0 +1,270 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Routines for reading and writing message data to an in-memory structure, + * similar to a C struct. + * + * upb does not define one single message object that everyone must use. + * Rather it defines an abstract interface for reading and writing members + * of a message object, and all of the parsers and serializers use this + * abstract interface. This allows upb's parsers and serializers to be used + * regardless of what memory management scheme or synchronization model the + * application is using. + * + * A standard set of accessors is provided for doing simple reads and writes at + * a known offset into the message. These accessors should be used when + * possible, because they are specially optimized -- for example, the JIT can + * recognize them and emit specialized code instead of having to call the + * function at all. The application can substitute its own accessors when the + * standard accessors are not suitable. + */ + +#ifndef UPB_MSG_H +#define UPB_MSG_H + +#include <stdlib.h> +#include "upb/def.h" +#include "upb/handlers.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_accessor ***************************************************************/ + +// A upb_accessor is a table of function pointers for doing reads and writes +// for one specific upb_fielddef. Each field has a separate accessor, which +// lives in the fielddef. + +typedef bool upb_has_reader(void *m, upb_value fval); +typedef upb_value upb_value_reader(void *m, upb_value fval); + +typedef void *upb_seqbegin_handler(void *s); +typedef void *upb_seqnext_handler(void *s, void *iter); +typedef upb_value upb_seqget_handler(void *iter); +INLINE bool upb_seq_done(void *iter) { return iter == NULL; } + +typedef struct _upb_accessor_vtbl { + // Writers. These take an fval as a parameter because the callbacks are used + // as upb_handlers, but the fval is always the fielddef for that field. + upb_startfield_handler *appendseq; // Repeated fields only. + upb_startfield_handler *appendsubmsg; // Submsg fields (repeated or no). + upb_value_handler *set; // Scalar fields (repeated or no). + + // Readers. + upb_has_reader *has; + upb_value_reader *get; + upb_seqbegin_handler *seqbegin; + upb_seqnext_handler *seqnext; + upb_seqget_handler *seqget; +} upb_accessor_vtbl; + +// Registers handlers for writing into a message of the given type. +upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m); + +// Returns an stdmsg accessor for the given fielddef. +upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f); + + +/* upb_msg/upb_seq ************************************************************/ + +// upb_msg and upb_seq allow for generic access to a message through its +// accessor vtable. Note that these do *not* allow you to create, destroy, or +// take references on the objects -- these operations are specifically outside +// the scope of what the accessors define. + +// Clears all hasbits. +// TODO: Add a separate function for setting primitive values back to their +// defaults (but not strings, submessages, or arrays). +void upb_msg_clear(void *msg, upb_msgdef *md); + +// Could add a method that recursively clears submessages, strings, and +// arrays if desired. This could be a win if you wanted to merge without +// needing hasbits, because during parsing you would never clear submessages +// or arrays. Also this could be desired to provide proto2 operations on +// generated messages. + +INLINE bool upb_msg_has(void *m, upb_fielddef *f) { + return f->accessor && f->accessor->has(m, f->fval); +} + +// May only be called for fields that are known to be set. +INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) { + assert(upb_msg_has(m, f)); + return f->accessor->get(m, f->fval); +} + +INLINE void *upb_seq_begin(void *s, upb_fielddef *f) { + assert(f->accessor); + return f->accessor->seqbegin(s); +} +INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) { + assert(f->accessor); + assert(!upb_seq_done(iter)); + return f->accessor->seqnext(s, iter); +} +INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) { + assert(f->accessor); + assert(!upb_seq_done(iter)); + return f->accessor->seqget(iter); +} + + +/* upb_msgvisitor *************************************************************/ + +// A upb_msgvisitor reads data from an in-memory structure using its accessors, +// pushing the results to a given set of upb_handlers. +// TODO: not yet implemented. + +typedef struct { + upb_fhandlers *fh; + upb_fielddef *f; + uint16_t msgindex; // Only when upb_issubmsg(f). +} upb_msgvisitor_field; + +typedef struct { + upb_msgvisitor_field *fields; + int fields_len; +} upb_msgvisitor_msg; + +typedef struct { + uint16_t msgindex; + uint16_t fieldindex; + uint32_t arrayindex; // UINT32_MAX if not an array frame. +} upb_msgvisitor_frame; + +typedef struct { + upb_msgvisitor_msg *messages; + int messages_len; + upb_dispatcher dispatcher; +} upb_msgvisitor; + +// Initializes a msgvisitor that will push data from messages of the given +// msgdef to the given set of handlers. +void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h); +void upb_msgvisitor_uninit(upb_msgvisitor *v); + +void upb_msgvisitor_reset(upb_msgvisitor *v, void *m); +void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status); + + +/* Standard writers. **********************************************************/ + +// Allocates a new stdmsg. +void *upb_stdmsg_new(upb_msgdef *md); + +// Recursively frees any strings or submessages that the message refers to. +void upb_stdmsg_free(void *m, upb_msgdef *md); + +// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit. +upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset); +upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset, + uint16_t subm_size, uint8_t subm_setbytes); + +// Value writers for every in-memory type: write the data to a known offset +// from the closure "c" and set the hasbit (if any). +// TODO: can we get away with having only one for int64, uint64, double, etc? +// The main thing in the way atm is that the upb_value is strongly typed. +// in debug mode. +upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); + +// Value writers for repeated fields: the closure points to a standard array +// struct, appends the value to the end of the array, resizing with realloc() +// if necessary. +typedef struct { + char *ptr; + uint32_t len; // Number of elements present. + uint32_t size; // Number of elements allocated. +} upb_stdarray; + +upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val); + +// Writers for C strings (NULL-terminated): we can find a char* at a known +// offset from the closure "c". Calls realloc() on the pointer to allocate +// the memory (TODO: investigate whether checking malloc_usable_size() would +// be cheaper than realloc()). Also sets the hasbit, if any. +// +// Since the string is NULL terminated and does not store an explicit length, +// these are not suitable for binary data that can contain NULLs. +upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val); + +// Writers for length-delimited strings: we explicitly store the length, so +// the data can contain NULLs. Stores the data using upb_stdarray +// which is located at a known offset from the closure "c" (note that it +// is included inline rather than pointed to). Also sets the hasbit, if any. +upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val); + +// Writers for startseq and startmsg which allocate (or reuse, if possible) +// a sub data structure (upb_stdarray or a submessage, respectively), +// setting the hasbit. If the hasbit is already set, the existing data +// structure is used verbatim. If the hasbit is not already set, the pointer +// is checked for NULL. If it is NULL, a new substructure is allocated, +// cleared, and used. If it is not NULL, the existing substructure is +// cleared and reused. +// +// If there is no hasbit, we always behave as if the hasbit was not set, +// so any existing data for this array or submessage is cleared. In most +// cases this will be fine since each array or non-repeated submessage should +// occur at most once in the stream. But if the client is using "concatenation +// as merging", it will want to make sure hasbits are allocated so merges can +// happen appropriately. +// +// If there was a demand for the behavior that absence of a hasbit acts as if +// the bit was always set, we could provide that also. But Clear() would need +// to act recursively, which is less efficient since it requires an extra pass +// over the tree. +upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval); +upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval); +upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval); + + +/* Standard readers. **********************************************************/ + +bool upb_stdmsg_has(void *c, upb_value fval); +void *upb_stdmsg_seqbegin(void *c); + +upb_value upb_stdmsg_getint64(void *c, upb_value fval); +upb_value upb_stdmsg_getint32(void *c, upb_value fval); +upb_value upb_stdmsg_getuint64(void *c, upb_value fval); +upb_value upb_stdmsg_getuint32(void *c, upb_value fval); +upb_value upb_stdmsg_getdouble(void *c, upb_value fval); +upb_value upb_stdmsg_getfloat(void *c, upb_value fval); +upb_value upb_stdmsg_getbool(void *c, upb_value fval); +upb_value upb_stdmsg_getptr(void *c, upb_value fval); + +void *upb_stdmsg_8byte_seqnext(void *c, void *iter); +void *upb_stdmsg_4byte_seqnext(void *c, void *iter); +void *upb_stdmsg_1byte_seqnext(void *c, void *iter); + +upb_value upb_stdmsg_seqgetint64(void *c); +upb_value upb_stdmsg_seqgetint32(void *c); +upb_value upb_stdmsg_seqgetuint64(void *c); +upb_value upb_stdmsg_seqgetuint32(void *c); +upb_value upb_stdmsg_seqgetdouble(void *c); +upb_value upb_stdmsg_seqgetfloat(void *c); +upb_value upb_stdmsg_seqgetbool(void *c); +upb_value upb_stdmsg_seqgetptr(void *c); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif |