diff options
author | Josh Haberman <jhaberman@gmail.com> | 2016-09-09 14:03:25 -0700 |
---|---|---|
committer | Josh Haberman <jhaberman@gmail.com> | 2016-11-29 17:56:13 +0000 |
commit | 4b0c4ca7fb0aa9207af3398e04534b23fbb88f27 (patch) | |
tree | 045750c6262e74f366ae2ec29797d3816005c21a /upb/msg.h | |
parent | 77c97fd3f29caa5c243294b5f4e6763b3ed3c36f (diff) |
New upb_msg code and Lua bindings around it.
There are still some things that are unfinished,
but we are at parity with what Lua had before.
Diffstat (limited to 'upb/msg.h')
-rw-r--r-- | upb/msg.h | 340 |
1 files changed, 340 insertions, 0 deletions
diff --git a/upb/msg.h b/upb/msg.h new file mode 100644 index 0000000..912d6ac --- /dev/null +++ b/upb/msg.h @@ -0,0 +1,340 @@ +/* +** upb::Message is a representation for protobuf messages. +** +** However it differs from other common representations like +** google::protobuf::Message in one key way: it does not prescribe any +** ownership semantics, and it does not perform any memory management. +** +** A client can access a upb::Message without knowing anything about +** ownership semantics, but to create or mutate a message a user needs +** to implement the memory management themselves. +** +** Currently all messages, arrays, and maps store a upb_alloc* internally. +** Mutating operations use this when they require dynamically-allocated +** memory. We could potentially eliminate this size overhead later by +** letting the user flip a bit on the factory that prevents this from +** being stored. The user would then need to use separate functions where +** the upb_alloc* is passed explicitly. However for handlers to populate +** such structures, they would need a place to store this upb_alloc* during +** parsing; upb_handlers don't currently have a good way to accommodate this. +** +** TODO: UTF-8 checking? +**/ + +#ifndef UPB_MSG_H_ +#define UPB_MSG_H_ + +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/sink.h" +#include "upb/symtab.h" + +#ifdef __cplusplus + +namespace upb { +class MessageLayout; +class MessageFactory; +} + +#endif + +UPB_DECLARE_TYPE(upb::MessageFactory, upb_msgfactory) +UPB_DECLARE_TYPE(upb::MessageLayout, upb_msglayout) +UPB_DECLARE_TYPE(upb::Array, upb_array) +UPB_DECLARE_TYPE(upb::Map, upb_map) +UPB_DECLARE_TYPE(upb::MapIterator, upb_mapiter) +UPB_DECLARE_TYPE(upb::Visitor, upb_visitor) +UPB_DECLARE_TYPE(upb::VisitorPlan, upb_visitorplan) + +/* TODO(haberman): C++ accessors */ + +UPB_BEGIN_EXTERN_C + + +/** upb_msglayout *************************************************************/ + +/* Please note that map_entry messages (upb_msgdef_mapentry(m) == true) cannot + * have layouts. They can only be represented as upb_map, not as a message. */ + +/* Requires that upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true. + * + * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true + * then this function will return the layout for the map's value. It requires + * that the value type of the map field is a submessage. */ +const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l, + const upb_fielddef *f); + +const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); + + +/** upb_msgfactory ************************************************************/ + +upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab); +void upb_msgfactory_free(upb_msgfactory *f); + +/* Requires: + * - m is in upb_msgfactory_symtab(f) + * - upb_msgdef_mapentry(m) == false (since map messages can't have layouts). + * + * The returned layout will live for as long as the msgfactory does. + */ +const upb_msglayout *upb_msgfactory_getlayout(const upb_msgfactory *f, + const upb_msgdef *m); + +/* Returns handlers for populating a message with the given msgdef. */ +const upb_handlers *upb_msgfactory_getmergehandlers(const upb_msgfactory *f, + const upb_msgdef *m); + +/* Returns a plan for visiting the data and submessages of the given msgdef. */ +const upb_visitorplan *upb_visitorplan_new(const upb_msgfactory *f, + const upb_msgdef *m); + +/* For actually visiting a message and its submessages. */ +upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp, + upb_sink *output); +bool upb_visitor_visitmsg(upb_visitor *v, const void *msg); + + +/** upb_msgval ****************************************************************/ + +/* A union representing all possible protobuf values. Used for generic get/set + * operations. */ + +typedef void upb_msg; + +typedef union { + bool b; + float flt; + double dbl; + int32_t i32; + int64_t i64; + uint32_t u32; + uint64_t u64; + const void* ptr; + struct { + const char *ptr; + size_t len; + } str; +} upb_msgval; + +#define ACCESSORS(name, membername, ctype) \ + UPB_INLINE ctype upb_msgval_get ## name(upb_msgval v) { \ + return v.membername; \ + } \ + UPB_INLINE void upb_msgval_set ## name(upb_msgval *v, ctype cval) { \ + v->membername = cval; \ + } \ + UPB_INLINE upb_msgval upb_msgval_ ## name(ctype v) { \ + upb_msgval ret; \ + ret.membername = v; \ + return ret; \ + } + +ACCESSORS(bool, b, bool) +ACCESSORS(float, flt, float) +ACCESSORS(double, dbl, double) +ACCESSORS(int32, i32, int32_t) +ACCESSORS(int64, i64, int64_t) +ACCESSORS(uint32, u32, uint32_t) +ACCESSORS(uint64, u64, uint64_t) +ACCESSORS(map, ptr, const upb_map*) +ACCESSORS(msg, ptr, const upb_msg*) +ACCESSORS(ptr, ptr, const void*) +ACCESSORS(arr, ptr, const upb_array*) + +#undef ACCESSORS + +UPB_INLINE upb_msgval upb_msgval_str(const char *ptr, size_t len) { + upb_msgval ret; + ret.str.ptr = ptr; + ret.str.len = len; + return ret; +} + + +/** upb_msg *******************************************************************/ + +size_t upb_msg_sizeof(const upb_msglayout *l); +void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a); +void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); + +upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a); +void upb_msg_free(upb_msg *msg, const upb_msglayout *l); + +upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l); + +/* Packs the tree of messages rooted at "msg" into a single hunk of memory, + * allocated from the given allocator. */ +void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l, + void *p, size_t *ofs, size_t size); + +/* Read-only message API. Can be safely called by anyone. */ + +/* Returns the value associated with this field: + * - for scalar fields (including strings), the value directly. + * - return upb_msg*, or upb_map* for msg/map. + * If the field is unset for these field types, returns NULL. + * + * TODO(haberman): should we let users store cached array/map/msg + * pointers here for fields that are unset? Could be useful for the + * strongly-owned submessage model (ie. generated C API that doesn't use + * arenas). + */ +upb_msgval upb_msg_get(const upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l); + +/* May only be called for fields where upb_fielddef_haspresence(f) == true. */ +bool upb_msg_has(const upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l); + +/* Returns NULL if no field in the oneof is set. */ +const upb_fielddef *upb_msg_getoneofcase(const upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l); + +/* Returns true if any field in the oneof is set. */ +bool upb_msg_hasoneof(const upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l); + + +/* Mutable message API. May only be called by the owner of the message who + * knows its ownership scheme and how to keep it consistent. */ + +/* Sets the given field to the given value. Does not perform any memory + * management: if you overwrite a pointer to a msg/array/map/string without + * cleaning it up (or using an arena) it will leak. + */ +bool upb_msg_set(upb_msg *msg, + const upb_fielddef *f, + upb_msgval val, + const upb_msglayout *l); + +/* For a primitive field, set it back to its default. For repeated, string, and + * submessage fields set it back to NULL. This could involve releasing some + * internal memory (for example, from an extension dictionary), but it is not + * recursive in any way and will not recover any memory that may be used by + * arrays/maps/strings/msgs that this field may have pointed to. + */ +bool upb_msg_clearfield(upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l); + +/* Clears all fields in the oneof such that none of them are set. */ +bool upb_msg_clearoneof(upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l); + +/* TODO(haberman): copyfrom()/mergefrom()? */ + + +/** upb_array *****************************************************************/ + +size_t upb_array_sizeof(upb_fieldtype_t type); +void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *a); +void upb_array_uninit(upb_array *arr); +upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a); +void upb_array_free(upb_array *arr); + +/* Read-only interface. Safe for anyone to call. */ + +size_t upb_array_size(const upb_array *arr); +upb_fieldtype_t upb_array_type(const upb_array *arr); +upb_msgval upb_array_get(const upb_array *arr, size_t i); + +/* Write interface. May only be called by the message's owner who can enforce + * its memory management invariants. */ + +bool upb_array_set(upb_array *arr, size_t i, upb_msgval val); + + +/** upb_map *******************************************************************/ + +/* Stores data for a map field. The map will internally allocate (and free, if + * desired) all the internal storage used for the hash table or tree, using the + * given allocator. It will also copy and internally store the data for string + * keys, but *not* for string or message *values*. So the caller must ensure + * that any string or message values outlive the map. */ + +size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype); +bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype, + upb_alloc *a); +void upb_map_uninit(upb_map *map); +upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, upb_alloc *a); +void upb_map_free(upb_map *map); + +/* Read-only interface. Safe for anyone to call. */ + +size_t upb_map_size(const upb_map *map); +upb_fieldtype_t upb_map_keytype(const upb_map *map); +upb_fieldtype_t upb_map_valuetype(const upb_map *map); +bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val); + +/* Write interface. May only be called by the message's owner who can enforce + * its memory management invariants. */ + +/* Sets or overwrites an entry in the map. Return value indicates whether + * the operation succeeded or failed with OOM, and also whether an existing + * key was replaced or not. */ +bool upb_map_set(upb_map *map, + upb_msgval key, upb_msgval val, + upb_msgval *valremoved); + +/* Deletes an entry in the map. Returns true if the key was present. */ +bool upb_map_del(upb_map *map, upb_msgval key); + + +/** upb_mapiter ***************************************************************/ + +/* For iterating over a map. Map iterators are invalidated by mutations to the + * map, but an invalidated iterator will never return junk or crash the process. + * An invalidated iterator may return entries that were already returned though, + * and if you keep invalidating the iterator during iteration, the program may + * enter an infinite loop. */ + +size_t upb_mapiter_sizeof(); + +void upb_mapiter_begin(upb_mapiter *i, const upb_map *t); +upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a); +void upb_mapiter_free(upb_mapiter *i, upb_alloc *a); +void upb_mapiter_next(upb_mapiter *i); +bool upb_mapiter_done(const upb_mapiter *i); + +/* For string keys, the value will be in upb_msgval_strkey(), *not* + * upb_msgval_str(). */ +upb_msgval upb_mapiter_key(const upb_mapiter *i); +upb_msgval upb_mapiter_value(const upb_mapiter *i); +void upb_mapiter_setdone(upb_mapiter *i); +bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2); + + +/** Handlers ******************************************************************/ + +/* These are the handlers used internally by upb_msgfactory_getmergehandlers(). + * They write scalar data to a known offset from the message pointer. + * + * These would be trivial for anyone to implement themselves, but it's better + * to use these because some JITs will recognize and specialize these instead + * of actually calling the function. */ + +/* Sets a handler for the given primitive field that will write the data at the + * given offset. If hasbit > 0, also sets a hasbit at the given bit offset + * (addressing each byte low to high). */ +bool upb_msg_setscalarhandler(upb_handlers *h, + const upb_fielddef *f, + size_t offset, + int32_t hasbit); + +/* If the given handler is a msghandlers_primitive field, returns true and sets + * *type, *offset and *hasbit. Otherwise returns false. */ +bool upb_msg_getscalarhandlerdata(const upb_handlers *h, + upb_selector_t s, + upb_fieldtype_t *type, + size_t *offset, + int32_t *hasbit); + +UPB_END_EXTERN_C + +#endif /* UPB_MSG_H_ */ |