summaryrefslogtreecommitdiff
path: root/upb_msg.h
diff options
context:
space:
mode:
Diffstat (limited to 'upb_msg.h')
-rw-r--r--upb_msg.h369
1 files changed, 0 insertions, 369 deletions
diff --git a/upb_msg.h b/upb_msg.h
deleted file mode 100644
index 8910505..0000000
--- a/upb_msg.h
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * A upb_msg provides a full description of a message as defined in a .proto
- * file. It supports many features and operations for dealing with proto
- * messages:
- * - reflection over .proto types at runtime (list fields, get names, etc).
- * - an in-memory byte-level format for efficiently storing and accessing msgs.
- * - serializing and deserializing from the in-memory format to a protobuf.
- * - optional memory management for handling strings, arrays, and submessages.
- *
- * Throughout this file, the following convention is used:
- * - "struct upb_msg *m" describes a message type (name, list of fields, etc).
- * - "void *data" is an actual message stored using the in-memory format.
- *
- * The in-memory format is very much like a C struct that you can define at
- * run-time, but also supports reflection. Like C structs it supports
- * offset-based access, as opposed to the much slower name-based lookup. The
- * format stores both the values themselves and bits describing whether each
- * field is set or not. For example:
- *
- * parsed message Foo {
- * optional bool a = 1;
- * repeated uint32 b = 2;
- * optional Bar c = 3;
- * }
- *
- * The in-memory layout for this message on a 32-bit machine will be something
- * like:
- *
- * Foo
- * +------------------------+
- * | set_flags a:1, b:1, c:1|
- * +------------------------+
- * | bool a (1 byte) |
- * +------------------------+
- * | padding (3 bytes) |
- * +------------------------+ upb_array
- * | upb_array* b (4 bytes) | ----> +----------------------------+
- * +------------------------+ | uint32* elements (4 bytes) | ---+
- * | Bar* c (4 bytes) | +----------------------------+ |
- * +------------------------+ | uint32 size (4 bytes) | |
- * +----------------------------+ |
- * |
- * -----------------------------------------------------------------+
- * |
- * V
- * uint32 array
- * +----+----+----+----+----+----+
- * | e1 | e2 | e3 | e4 | e5 | e6 |
- * +----+----+----+----+----+----+
- *
- * And the corresponding C structure (as emitted by the proto compiler) would be:
- *
- * struct Foo {
- * union {
- * uint8_t bytes[1];
- * struct {
- * bool a:1;
- * bool b:1;
- * bool c:1;
- * } has;
- * } set_flags;
- * bool a;
- * upb_uint32_array *b;
- * Bar *c;
- * }
- *
- * Because the C struct emitted by the upb compiler uses exactly the same
- * byte-level format as the reflection interface, you can access the same hunk
- * of memory either way. The C struct provides maximum performance and static
- * type safety; upb_msg provides flexibility.
- *
- * The in-memory format has no interoperability guarantees whatsoever, except
- * that a single version of upb will interoperate with itself. Don't even
- * think about persisting the in-memory format or sending it anywhere. That's
- * what serialized protobufs are for! The in-memory format is just that -- an
- * in-memory representation that allows for fast access.
- *
- * The in-memory format is carefully designed to *not* mandate any particular
- * memory management scheme. This should make it easier to integrate with
- * existing memory management schemes, or to perform advanced techniques like
- * reference counting, garbage collection, and string references. Different
- * clients can read each others messages regardless of what memory management
- * scheme each is using.
- *
- * A memory management scheme is provided for convenience, and it is used by
- * default by the stock message parser. Clients can substitute their own
- * memory management scheme into this parser without any loss of generality
- * or performance.
- */
-
-#ifndef UPB_MSG_H_
-#define UPB_MSG_H_
-
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "upb.h"
-#include "upb_table.h"
-#include "upb_parse.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Forward declarations from descriptor.h. */
-struct google_protobuf_DescriptorProto;
-struct google_protobuf_FieldDescriptorProto;
-
-/* Message definition. ********************************************************/
-
-/* Structure that describes a single field in a message. This structure is very
- * consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
- * because copies of this struct are in the hash table that is read in the
- * critical path of parsing. Minimizing the size of this struct increases
- * cache-friendliness. */
-struct upb_msg_field {
- union upb_symbol_ref ref;
- uint32_t byte_offset; /* Where to find the data. */
- uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
- upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
- upb_label_t label;
-};
-
-/* Structure that describes a single .proto message type. */
-struct upb_msg {
- struct google_protobuf_DescriptorProto *descriptor;
- size_t size;
- uint32_t num_fields;
- uint32_t set_flags_bytes;
- uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */
- struct upb_inttable fields_by_num;
- struct upb_strtable fields_by_name;
- struct upb_msg_field *fields;
- struct google_protobuf_FieldDescriptorProto **field_descriptors;
-};
-
-/* The num->field and name->field maps in upb_msg allow fast lookup of fields
- * by number or name. These lookups are in the critical path of parsing and
- * field lookup, so they must be as fast as possible. To make these more
- * cache-friendly, we put the data in the table by value. */
-
-struct upb_fieldsbynum_entry {
- struct upb_inttable_entry e;
- struct upb_msg_field f;
-};
-
-struct upb_fieldsbyname_entry {
- struct upb_strtable_entry e;
- struct upb_msg_field f;
-};
-
-/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */
-INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
- struct upb_msg_field *f, struct upb_msg *m) {
- return m->field_descriptors[f->field_index];
-}
-
-/* Initializes/frees a upb_msg. Caller retains ownership of d, but the msg
- * will contain references to it, so it must outlive the msg. Note that init
- * does not resolve upb_msg_field.ref -- the caller should do that
- * post-initialization by calling upb_msg_ref() below. */
-bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d);
-void upb_msg_free(struct upb_msg *m);
-
-/* Clients use this function on a previously initialized upb_msg to resolve the
- * "ref" field in the upb_msg_field. Since messages can refer to each other in
- * mutually-recursive ways, this step must be separated from initialization. */
-void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref);
-
-/* Looks up a field by name or number. While these are written to be as fast
- * as possible, it will still be faster to cache the results of this lookup if
- * possible. These return NULL if no such field is found. */
-INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
- uint32_t number) {
- struct upb_fieldsbynum_entry *e =
- (struct upb_fieldsbynum_entry*)upb_inttable_lookup(
- &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
- return e ? &e->f : NULL;
-}
-INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
- struct upb_string *name) {
- struct upb_fieldsbyname_entry *e =
- (struct upb_fieldsbyname_entry*)upb_strtable_lookup(
- &m->fields_by_name, name);
- return e ? &e->f : NULL;
-}
-
-/* "Set" flag reading and writing. *******************************************/
-
-INLINE size_t upb_isset_offset(uint32_t field_index) {
- return field_index / 8;
-}
-
-INLINE uint8_t upb_isset_mask(uint32_t field_index) {
- return 1 << (field_index % 8);
-}
-
-/* Functions for reading and writing the "set" flags in the msg. Note that
- * these do not perform memory management associated with any dynamic memory
- * these fields may be referencing. These *only* set and test the flags. */
-INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
-{
- ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
-}
-
-INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
-{
- ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
-}
-
-INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f)
-{
- return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
-}
-
-INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
-{
- int num_fields = m->num_required_fields;
- int i = 0;
- while(num_fields > 8) {
- if(((uint8_t*)s)[i++] != 0xFF) return false;
- num_fields -= 8;
- }
- if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
- return true;
-}
-
-INLINE void upb_msg_clear(void *s, struct upb_msg *m)
-{
- memset(s, 0, m->set_flags_bytes);
-}
-
-/* Scalar (non-array) data access. ********************************************/
-
-/* Returns a pointer to a specific field in a message. */
-INLINE union upb_value_ptr upb_msg_getptr(void *data, struct upb_msg_field *f) {
- union upb_value_ptr p;
- p._void = ((char*)data + f->byte_offset);
- return p;
-}
-
-/* Arrays. ********************************************************************/
-
-/* Represents an array (a repeated field) of any type. The interpretation of
- * the data in the array depends on the type. */
-struct upb_array {
- union upb_value_ptr elements;
- uint32_t len; /* Measured in elements. */
-};
-
-/* Returns a pointer to an array element. */
-INLINE union upb_value_ptr upb_array_getelementptr(
- struct upb_array *arr, uint32_t n, upb_field_type_t type)
-{
- union upb_value_ptr ptr;
- ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size);
- return ptr;
-}
-
-/* These are all overlays on upb_array, pointers between them can be cast. */
-#define UPB_DEFINE_ARRAY_TYPE(name, type) \
- struct name ## _array { \
- type *elements; \
- uint32_t len; \
- };
-
-UPB_DEFINE_ARRAY_TYPE(upb_double, double)
-UPB_DEFINE_ARRAY_TYPE(upb_float, float)
-UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
-UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
-
-/* Defines an array of a specific message type. */
-#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
-#define UPB_DEFINE_MSG_ARRAY(msg_type) \
- UPB_MSG_ARRAY(msg_type) { \
- msg_type **elements; \
- uint32_t len; \
- };
-
-/* Memory management *********************************************************/
-
-/* One important note about these memory management routines: they must be used
- * completely or not at all (for each message). In other words, you can't
- * allocate your own message and then free it with upb_msgdata_free. As
- * another example, you can't point a field to your own string and then call
- * upb_msg_reuse_str. */
-
-/* Allocates and frees message data, respectively. Newly allocated data is
- * initialized to empty. Freeing a message always frees string data, but
- * the client can decide whether or not submessages should be deleted. */
-void *upb_msgdata_new(struct upb_msg *m);
-void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs);
-
-/* Given a pointer to the appropriate field of the message or array, these
- * functions will lazily allocate memory for a string, array, or submessage.
- * If the previously allocated memory is big enough, it will reuse it without
- * re-allocating. See upb_msg.c for example usage. */
-
-/* Reuse a string of at least the given size. */
-void upb_msg_reuse_str(struct upb_string **str, uint32_t size);
-/* Like the previous, but assumes that the string will be by reference, so
- * doesn't allocate memory for the string itself. */
-void upb_msg_reuse_strref(struct upb_string **str);
-
-/* Reuse an array of at least the given size, with the given type. */
-void upb_msg_reuse_array(struct upb_array **arr, uint32_t size,
- upb_field_type_t t);
-
-/* Reuse a submessage of the given type. */
-void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
-
-/* Serialization/Deserialization. ********************************************/
-
-/* This is all just a layer on top of the stream-oriented facility in
- * upb_parse.h. */
-
-struct upb_msg_parse_state {
- struct upb_parse_state s;
- bool merge;
- bool byref;
- struct upb_msg *m;
-};
-
-/* Initializes/frees a message parser. The parser will write the data to the
- * message data "data", which the caller must have previously allocated (the
- * parser will allocate submsgs, strings, and arrays as needed, however).
- *
- * "Merge" controls whether the parser will append to data instead of
- * overwriting. Merging concatenates arrays and merges submessages instead
- * of clearing both.
- *
- * "Byref" controls whether the new message data copies or references strings
- * it encounters. If byref == true, then all strings supplied to upb_msg_parse
- * must remain unchanged and must outlive data. */
-void upb_msg_parse_init(struct upb_msg_parse_state *s, void *data,
- struct upb_msg *m, bool merge, bool byref);
-void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *data,
- struct upb_msg *m, bool merge, bool byref);
-void upb_msg_parse_free(struct upb_msg_parse_state *s);
-
-/* Parses a protobuf fragment, writing the data to the message that was passed
- * to upb_msg_parse_init. This function can be called multiple times as more
- * data becomes available. */
-upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
- void *data, size_t len, size_t *read);
-
-/* Parses the protobuf in s (which is expected to be complete) and allocates
- * new message data to hold it. This is an alternative to the streaming API
- * above. "byref" works as in upb_msg_parse_init(). */
-void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);
-
-
-/* Text dump *****************************************************************/
-
-void upb_msg_print(void *data, struct upb_msg *m, FILE *stream);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_MSG_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback