summaryrefslogtreecommitdiff
path: root/upb/msg.h
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-07-15 12:05:43 -0700
committerJoshua Haberman <joshua@reverberate.org>2011-07-15 12:05:43 -0700
commit10265aa56b22ac4f04e7ba08330138e4507534e4 (patch)
treee821c85219a10b4ee3df715ab774465fdf87cf1d /upb/msg.h
parent6a1f3a66939308668ab8dce0d195afec16e02af9 (diff)
Directory restructure.
Includes are now via upb/foo.h. Files specific to the protobuf format are now in upb/pb (the core library is concerned with message definitions, handlers, and byte streams, but knows nothing about any particular serializationf format).
Diffstat (limited to 'upb/msg.h')
-rw-r--r--upb/msg.h270
1 files changed, 270 insertions, 0 deletions
diff --git a/upb/msg.h b/upb/msg.h
new file mode 100644
index 0000000..625d805
--- /dev/null
+++ b/upb/msg.h
@@ -0,0 +1,270 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Routines for reading and writing message data to an in-memory structure,
+ * similar to a C struct.
+ *
+ * upb does not define one single message object that everyone must use.
+ * Rather it defines an abstract interface for reading and writing members
+ * of a message object, and all of the parsers and serializers use this
+ * abstract interface. This allows upb's parsers and serializers to be used
+ * regardless of what memory management scheme or synchronization model the
+ * application is using.
+ *
+ * A standard set of accessors is provided for doing simple reads and writes at
+ * a known offset into the message. These accessors should be used when
+ * possible, because they are specially optimized -- for example, the JIT can
+ * recognize them and emit specialized code instead of having to call the
+ * function at all. The application can substitute its own accessors when the
+ * standard accessors are not suitable.
+ */
+
+#ifndef UPB_MSG_H
+#define UPB_MSG_H
+
+#include <stdlib.h>
+#include "upb/def.h"
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_accessor ***************************************************************/
+
+// A upb_accessor is a table of function pointers for doing reads and writes
+// for one specific upb_fielddef. Each field has a separate accessor, which
+// lives in the fielddef.
+
+typedef bool upb_has_reader(void *m, upb_value fval);
+typedef upb_value upb_value_reader(void *m, upb_value fval);
+
+typedef void *upb_seqbegin_handler(void *s);
+typedef void *upb_seqnext_handler(void *s, void *iter);
+typedef upb_value upb_seqget_handler(void *iter);
+INLINE bool upb_seq_done(void *iter) { return iter == NULL; }
+
+typedef struct _upb_accessor_vtbl {
+ // Writers. These take an fval as a parameter because the callbacks are used
+ // as upb_handlers, but the fval is always the fielddef for that field.
+ upb_startfield_handler *appendseq; // Repeated fields only.
+ upb_startfield_handler *appendsubmsg; // Submsg fields (repeated or no).
+ upb_value_handler *set; // Scalar fields (repeated or no).
+
+ // Readers.
+ upb_has_reader *has;
+ upb_value_reader *get;
+ upb_seqbegin_handler *seqbegin;
+ upb_seqnext_handler *seqnext;
+ upb_seqget_handler *seqget;
+} upb_accessor_vtbl;
+
+// Registers handlers for writing into a message of the given type.
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m);
+
+// Returns an stdmsg accessor for the given fielddef.
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
+
+
+/* upb_msg/upb_seq ************************************************************/
+
+// upb_msg and upb_seq allow for generic access to a message through its
+// accessor vtable. Note that these do *not* allow you to create, destroy, or
+// take references on the objects -- these operations are specifically outside
+// the scope of what the accessors define.
+
+// Clears all hasbits.
+// TODO: Add a separate function for setting primitive values back to their
+// defaults (but not strings, submessages, or arrays).
+void upb_msg_clear(void *msg, upb_msgdef *md);
+
+// Could add a method that recursively clears submessages, strings, and
+// arrays if desired. This could be a win if you wanted to merge without
+// needing hasbits, because during parsing you would never clear submessages
+// or arrays. Also this could be desired to provide proto2 operations on
+// generated messages.
+
+INLINE bool upb_msg_has(void *m, upb_fielddef *f) {
+ return f->accessor && f->accessor->has(m, f->fval);
+}
+
+// May only be called for fields that are known to be set.
+INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) {
+ assert(upb_msg_has(m, f));
+ return f->accessor->get(m, f->fval);
+}
+
+INLINE void *upb_seq_begin(void *s, upb_fielddef *f) {
+ assert(f->accessor);
+ return f->accessor->seqbegin(s);
+}
+INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) {
+ assert(f->accessor);
+ assert(!upb_seq_done(iter));
+ return f->accessor->seqnext(s, iter);
+}
+INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) {
+ assert(f->accessor);
+ assert(!upb_seq_done(iter));
+ return f->accessor->seqget(iter);
+}
+
+
+/* upb_msgvisitor *************************************************************/
+
+// A upb_msgvisitor reads data from an in-memory structure using its accessors,
+// pushing the results to a given set of upb_handlers.
+// TODO: not yet implemented.
+
+typedef struct {
+ upb_fhandlers *fh;
+ upb_fielddef *f;
+ uint16_t msgindex; // Only when upb_issubmsg(f).
+} upb_msgvisitor_field;
+
+typedef struct {
+ upb_msgvisitor_field *fields;
+ int fields_len;
+} upb_msgvisitor_msg;
+
+typedef struct {
+ uint16_t msgindex;
+ uint16_t fieldindex;
+ uint32_t arrayindex; // UINT32_MAX if not an array frame.
+} upb_msgvisitor_frame;
+
+typedef struct {
+ upb_msgvisitor_msg *messages;
+ int messages_len;
+ upb_dispatcher dispatcher;
+} upb_msgvisitor;
+
+// Initializes a msgvisitor that will push data from messages of the given
+// msgdef to the given set of handlers.
+void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
+void upb_msgvisitor_uninit(upb_msgvisitor *v);
+
+void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
+void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
+
+
+/* Standard writers. **********************************************************/
+
+// Allocates a new stdmsg.
+void *upb_stdmsg_new(upb_msgdef *md);
+
+// Recursively frees any strings or submessages that the message refers to.
+void upb_stdmsg_free(void *m, upb_msgdef *md);
+
+// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit.
+upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
+upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
+ uint16_t subm_size, uint8_t subm_setbytes);
+
+// Value writers for every in-memory type: write the data to a known offset
+// from the closure "c" and set the hasbit (if any).
+// TODO: can we get away with having only one for int64, uint64, double, etc?
+// The main thing in the way atm is that the upb_value is strongly typed.
+// in debug mode.
+upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
+
+// Value writers for repeated fields: the closure points to a standard array
+// struct, appends the value to the end of the array, resizing with realloc()
+// if necessary.
+typedef struct {
+ char *ptr;
+ uint32_t len; // Number of elements present.
+ uint32_t size; // Number of elements allocated.
+} upb_stdarray;
+
+upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
+
+// Writers for C strings (NULL-terminated): we can find a char* at a known
+// offset from the closure "c". Calls realloc() on the pointer to allocate
+// the memory (TODO: investigate whether checking malloc_usable_size() would
+// be cheaper than realloc()). Also sets the hasbit, if any.
+//
+// Since the string is NULL terminated and does not store an explicit length,
+// these are not suitable for binary data that can contain NULLs.
+upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for length-delimited strings: we explicitly store the length, so
+// the data can contain NULLs. Stores the data using upb_stdarray
+// which is located at a known offset from the closure "c" (note that it
+// is included inline rather than pointed to). Also sets the hasbit, if any.
+upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for startseq and startmsg which allocate (or reuse, if possible)
+// a sub data structure (upb_stdarray or a submessage, respectively),
+// setting the hasbit. If the hasbit is already set, the existing data
+// structure is used verbatim. If the hasbit is not already set, the pointer
+// is checked for NULL. If it is NULL, a new substructure is allocated,
+// cleared, and used. If it is not NULL, the existing substructure is
+// cleared and reused.
+//
+// If there is no hasbit, we always behave as if the hasbit was not set,
+// so any existing data for this array or submessage is cleared. In most
+// cases this will be fine since each array or non-repeated submessage should
+// occur at most once in the stream. But if the client is using "concatenation
+// as merging", it will want to make sure hasbits are allocated so merges can
+// happen appropriately.
+//
+// If there was a demand for the behavior that absence of a hasbit acts as if
+// the bit was always set, we could provide that also. But Clear() would need
+// to act recursively, which is less efficient since it requires an extra pass
+// over the tree.
+upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
+
+
+/* Standard readers. **********************************************************/
+
+bool upb_stdmsg_has(void *c, upb_value fval);
+void *upb_stdmsg_seqbegin(void *c);
+
+upb_value upb_stdmsg_getint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getdouble(void *c, upb_value fval);
+upb_value upb_stdmsg_getfloat(void *c, upb_value fval);
+upb_value upb_stdmsg_getbool(void *c, upb_value fval);
+upb_value upb_stdmsg_getptr(void *c, upb_value fval);
+
+void *upb_stdmsg_8byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_4byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_1byte_seqnext(void *c, void *iter);
+
+upb_value upb_stdmsg_seqgetint64(void *c);
+upb_value upb_stdmsg_seqgetint32(void *c);
+upb_value upb_stdmsg_seqgetuint64(void *c);
+upb_value upb_stdmsg_seqgetuint32(void *c);
+upb_value upb_stdmsg_seqgetdouble(void *c);
+upb_value upb_stdmsg_seqgetfloat(void *c);
+upb_value upb_stdmsg_seqgetbool(void *c);
+upb_value upb_stdmsg_seqgetptr(void *c);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback