summaryrefslogtreecommitdiff
path: root/upb/msg.h
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2016-09-09 14:03:25 -0700
committerJosh Haberman <jhaberman@gmail.com>2016-11-29 17:56:13 +0000
commit4b0c4ca7fb0aa9207af3398e04534b23fbb88f27 (patch)
tree045750c6262e74f366ae2ec29797d3816005c21a /upb/msg.h
parent77c97fd3f29caa5c243294b5f4e6763b3ed3c36f (diff)
New upb_msg code and Lua bindings around it.
There are still some things that are unfinished, but we are at parity with what Lua had before.
Diffstat (limited to 'upb/msg.h')
-rw-r--r--upb/msg.h340
1 files changed, 340 insertions, 0 deletions
diff --git a/upb/msg.h b/upb/msg.h
new file mode 100644
index 0000000..912d6ac
--- /dev/null
+++ b/upb/msg.h
@@ -0,0 +1,340 @@
+/*
+** upb::Message is a representation for protobuf messages.
+**
+** However it differs from other common representations like
+** google::protobuf::Message in one key way: it does not prescribe any
+** ownership semantics, and it does not perform any memory management.
+**
+** A client can access a upb::Message without knowing anything about
+** ownership semantics, but to create or mutate a message a user needs
+** to implement the memory management themselves.
+**
+** Currently all messages, arrays, and maps store a upb_alloc* internally.
+** Mutating operations use this when they require dynamically-allocated
+** memory. We could potentially eliminate this size overhead later by
+** letting the user flip a bit on the factory that prevents this from
+** being stored. The user would then need to use separate functions where
+** the upb_alloc* is passed explicitly. However for handlers to populate
+** such structures, they would need a place to store this upb_alloc* during
+** parsing; upb_handlers don't currently have a good way to accommodate this.
+**
+** TODO: UTF-8 checking?
+**/
+
+#ifndef UPB_MSG_H_
+#define UPB_MSG_H_
+
+#include "upb/def.h"
+#include "upb/handlers.h"
+#include "upb/sink.h"
+#include "upb/symtab.h"
+
+#ifdef __cplusplus
+
+namespace upb {
+class MessageLayout;
+class MessageFactory;
+}
+
+#endif
+
+UPB_DECLARE_TYPE(upb::MessageFactory, upb_msgfactory)
+UPB_DECLARE_TYPE(upb::MessageLayout, upb_msglayout)
+UPB_DECLARE_TYPE(upb::Array, upb_array)
+UPB_DECLARE_TYPE(upb::Map, upb_map)
+UPB_DECLARE_TYPE(upb::MapIterator, upb_mapiter)
+UPB_DECLARE_TYPE(upb::Visitor, upb_visitor)
+UPB_DECLARE_TYPE(upb::VisitorPlan, upb_visitorplan)
+
+/* TODO(haberman): C++ accessors */
+
+UPB_BEGIN_EXTERN_C
+
+
+/** upb_msglayout *************************************************************/
+
+/* Please note that map_entry messages (upb_msgdef_mapentry(m) == true) cannot
+ * have layouts. They can only be represented as upb_map, not as a message. */
+
+/* Requires that upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true.
+ *
+ * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true
+ * then this function will return the layout for the map's value. It requires
+ * that the value type of the map field is a submessage. */
+const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l,
+ const upb_fielddef *f);
+
+const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l);
+
+
+/** upb_msgfactory ************************************************************/
+
+upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab);
+void upb_msgfactory_free(upb_msgfactory *f);
+
+/* Requires:
+ * - m is in upb_msgfactory_symtab(f)
+ * - upb_msgdef_mapentry(m) == false (since map messages can't have layouts).
+ *
+ * The returned layout will live for as long as the msgfactory does.
+ */
+const upb_msglayout *upb_msgfactory_getlayout(const upb_msgfactory *f,
+ const upb_msgdef *m);
+
+/* Returns handlers for populating a message with the given msgdef. */
+const upb_handlers *upb_msgfactory_getmergehandlers(const upb_msgfactory *f,
+ const upb_msgdef *m);
+
+/* Returns a plan for visiting the data and submessages of the given msgdef. */
+const upb_visitorplan *upb_visitorplan_new(const upb_msgfactory *f,
+ const upb_msgdef *m);
+
+/* For actually visiting a message and its submessages. */
+upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp,
+ upb_sink *output);
+bool upb_visitor_visitmsg(upb_visitor *v, const void *msg);
+
+
+/** upb_msgval ****************************************************************/
+
+/* A union representing all possible protobuf values. Used for generic get/set
+ * operations. */
+
+typedef void upb_msg;
+
+typedef union {
+ bool b;
+ float flt;
+ double dbl;
+ int32_t i32;
+ int64_t i64;
+ uint32_t u32;
+ uint64_t u64;
+ const void* ptr;
+ struct {
+ const char *ptr;
+ size_t len;
+ } str;
+} upb_msgval;
+
+#define ACCESSORS(name, membername, ctype) \
+ UPB_INLINE ctype upb_msgval_get ## name(upb_msgval v) { \
+ return v.membername; \
+ } \
+ UPB_INLINE void upb_msgval_set ## name(upb_msgval *v, ctype cval) { \
+ v->membername = cval; \
+ } \
+ UPB_INLINE upb_msgval upb_msgval_ ## name(ctype v) { \
+ upb_msgval ret; \
+ ret.membername = v; \
+ return ret; \
+ }
+
+ACCESSORS(bool, b, bool)
+ACCESSORS(float, flt, float)
+ACCESSORS(double, dbl, double)
+ACCESSORS(int32, i32, int32_t)
+ACCESSORS(int64, i64, int64_t)
+ACCESSORS(uint32, u32, uint32_t)
+ACCESSORS(uint64, u64, uint64_t)
+ACCESSORS(map, ptr, const upb_map*)
+ACCESSORS(msg, ptr, const upb_msg*)
+ACCESSORS(ptr, ptr, const void*)
+ACCESSORS(arr, ptr, const upb_array*)
+
+#undef ACCESSORS
+
+UPB_INLINE upb_msgval upb_msgval_str(const char *ptr, size_t len) {
+ upb_msgval ret;
+ ret.str.ptr = ptr;
+ ret.str.len = len;
+ return ret;
+}
+
+
+/** upb_msg *******************************************************************/
+
+size_t upb_msg_sizeof(const upb_msglayout *l);
+void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a);
+void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l);
+
+upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a);
+void upb_msg_free(upb_msg *msg, const upb_msglayout *l);
+
+upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l);
+
+/* Packs the tree of messages rooted at "msg" into a single hunk of memory,
+ * allocated from the given allocator. */
+void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l,
+ void *p, size_t *ofs, size_t size);
+
+/* Read-only message API. Can be safely called by anyone. */
+
+/* Returns the value associated with this field:
+ * - for scalar fields (including strings), the value directly.
+ * - return upb_msg*, or upb_map* for msg/map.
+ * If the field is unset for these field types, returns NULL.
+ *
+ * TODO(haberman): should we let users store cached array/map/msg
+ * pointers here for fields that are unset? Could be useful for the
+ * strongly-owned submessage model (ie. generated C API that doesn't use
+ * arenas).
+ */
+upb_msgval upb_msg_get(const upb_msg *msg,
+ const upb_fielddef *f,
+ const upb_msglayout *l);
+
+/* May only be called for fields where upb_fielddef_haspresence(f) == true. */
+bool upb_msg_has(const upb_msg *msg,
+ const upb_fielddef *f,
+ const upb_msglayout *l);
+
+/* Returns NULL if no field in the oneof is set. */
+const upb_fielddef *upb_msg_getoneofcase(const upb_msg *msg,
+ const upb_oneofdef *o,
+ const upb_msglayout *l);
+
+/* Returns true if any field in the oneof is set. */
+bool upb_msg_hasoneof(const upb_msg *msg,
+ const upb_oneofdef *o,
+ const upb_msglayout *l);
+
+
+/* Mutable message API. May only be called by the owner of the message who
+ * knows its ownership scheme and how to keep it consistent. */
+
+/* Sets the given field to the given value. Does not perform any memory
+ * management: if you overwrite a pointer to a msg/array/map/string without
+ * cleaning it up (or using an arena) it will leak.
+ */
+bool upb_msg_set(upb_msg *msg,
+ const upb_fielddef *f,
+ upb_msgval val,
+ const upb_msglayout *l);
+
+/* For a primitive field, set it back to its default. For repeated, string, and
+ * submessage fields set it back to NULL. This could involve releasing some
+ * internal memory (for example, from an extension dictionary), but it is not
+ * recursive in any way and will not recover any memory that may be used by
+ * arrays/maps/strings/msgs that this field may have pointed to.
+ */
+bool upb_msg_clearfield(upb_msg *msg,
+ const upb_fielddef *f,
+ const upb_msglayout *l);
+
+/* Clears all fields in the oneof such that none of them are set. */
+bool upb_msg_clearoneof(upb_msg *msg,
+ const upb_oneofdef *o,
+ const upb_msglayout *l);
+
+/* TODO(haberman): copyfrom()/mergefrom()? */
+
+
+/** upb_array *****************************************************************/
+
+size_t upb_array_sizeof(upb_fieldtype_t type);
+void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *a);
+void upb_array_uninit(upb_array *arr);
+upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a);
+void upb_array_free(upb_array *arr);
+
+/* Read-only interface. Safe for anyone to call. */
+
+size_t upb_array_size(const upb_array *arr);
+upb_fieldtype_t upb_array_type(const upb_array *arr);
+upb_msgval upb_array_get(const upb_array *arr, size_t i);
+
+/* Write interface. May only be called by the message's owner who can enforce
+ * its memory management invariants. */
+
+bool upb_array_set(upb_array *arr, size_t i, upb_msgval val);
+
+
+/** upb_map *******************************************************************/
+
+/* Stores data for a map field. The map will internally allocate (and free, if
+ * desired) all the internal storage used for the hash table or tree, using the
+ * given allocator. It will also copy and internally store the data for string
+ * keys, but *not* for string or message *values*. So the caller must ensure
+ * that any string or message values outlive the map. */
+
+size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype);
+bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype,
+ upb_alloc *a);
+void upb_map_uninit(upb_map *map);
+upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, upb_alloc *a);
+void upb_map_free(upb_map *map);
+
+/* Read-only interface. Safe for anyone to call. */
+
+size_t upb_map_size(const upb_map *map);
+upb_fieldtype_t upb_map_keytype(const upb_map *map);
+upb_fieldtype_t upb_map_valuetype(const upb_map *map);
+bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val);
+
+/* Write interface. May only be called by the message's owner who can enforce
+ * its memory management invariants. */
+
+/* Sets or overwrites an entry in the map. Return value indicates whether
+ * the operation succeeded or failed with OOM, and also whether an existing
+ * key was replaced or not. */
+bool upb_map_set(upb_map *map,
+ upb_msgval key, upb_msgval val,
+ upb_msgval *valremoved);
+
+/* Deletes an entry in the map. Returns true if the key was present. */
+bool upb_map_del(upb_map *map, upb_msgval key);
+
+
+/** upb_mapiter ***************************************************************/
+
+/* For iterating over a map. Map iterators are invalidated by mutations to the
+ * map, but an invalidated iterator will never return junk or crash the process.
+ * An invalidated iterator may return entries that were already returned though,
+ * and if you keep invalidating the iterator during iteration, the program may
+ * enter an infinite loop. */
+
+size_t upb_mapiter_sizeof();
+
+void upb_mapiter_begin(upb_mapiter *i, const upb_map *t);
+upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a);
+void upb_mapiter_free(upb_mapiter *i, upb_alloc *a);
+void upb_mapiter_next(upb_mapiter *i);
+bool upb_mapiter_done(const upb_mapiter *i);
+
+/* For string keys, the value will be in upb_msgval_strkey(), *not*
+ * upb_msgval_str(). */
+upb_msgval upb_mapiter_key(const upb_mapiter *i);
+upb_msgval upb_mapiter_value(const upb_mapiter *i);
+void upb_mapiter_setdone(upb_mapiter *i);
+bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2);
+
+
+/** Handlers ******************************************************************/
+
+/* These are the handlers used internally by upb_msgfactory_getmergehandlers().
+ * They write scalar data to a known offset from the message pointer.
+ *
+ * These would be trivial for anyone to implement themselves, but it's better
+ * to use these because some JITs will recognize and specialize these instead
+ * of actually calling the function. */
+
+/* Sets a handler for the given primitive field that will write the data at the
+ * given offset. If hasbit > 0, also sets a hasbit at the given bit offset
+ * (addressing each byte low to high). */
+bool upb_msg_setscalarhandler(upb_handlers *h,
+ const upb_fielddef *f,
+ size_t offset,
+ int32_t hasbit);
+
+/* If the given handler is a msghandlers_primitive field, returns true and sets
+ * *type, *offset and *hasbit. Otherwise returns false. */
+bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
+ upb_selector_t s,
+ upb_fieldtype_t *type,
+ size_t *offset,
+ int32_t *hasbit);
+
+UPB_END_EXTERN_C
+
+#endif /* UPB_MSG_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback