From 462b26c1cc041a8fa26deb62cf12f1f351a5b2f6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 8 Jul 2009 12:06:47 -0700 Subject: Directory restructuring. --- upb_msg.h | 369 -------------------------------------------------------------- 1 file changed, 369 deletions(-) delete mode 100644 upb_msg.h (limited to 'upb_msg.h') diff --git a/upb_msg.h b/upb_msg.h deleted file mode 100644 index 8910505..0000000 --- a/upb_msg.h +++ /dev/null @@ -1,369 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * A upb_msg provides a full description of a message as defined in a .proto - * file. It supports many features and operations for dealing with proto - * messages: - * - reflection over .proto types at runtime (list fields, get names, etc). - * - an in-memory byte-level format for efficiently storing and accessing msgs. - * - serializing and deserializing from the in-memory format to a protobuf. - * - optional memory management for handling strings, arrays, and submessages. - * - * Throughout this file, the following convention is used: - * - "struct upb_msg *m" describes a message type (name, list of fields, etc). - * - "void *data" is an actual message stored using the in-memory format. - * - * The in-memory format is very much like a C struct that you can define at - * run-time, but also supports reflection. Like C structs it supports - * offset-based access, as opposed to the much slower name-based lookup. The - * format stores both the values themselves and bits describing whether each - * field is set or not. For example: - * - * parsed message Foo { - * optional bool a = 1; - * repeated uint32 b = 2; - * optional Bar c = 3; - * } - * - * The in-memory layout for this message on a 32-bit machine will be something - * like: - * - * Foo - * +------------------------+ - * | set_flags a:1, b:1, c:1| - * +------------------------+ - * | bool a (1 byte) | - * +------------------------+ - * | padding (3 bytes) | - * +------------------------+ upb_array - * | upb_array* b (4 bytes) | ----> +----------------------------+ - * +------------------------+ | uint32* elements (4 bytes) | ---+ - * | Bar* c (4 bytes) | +----------------------------+ | - * +------------------------+ | uint32 size (4 bytes) | | - * +----------------------------+ | - * | - * -----------------------------------------------------------------+ - * | - * V - * uint32 array - * +----+----+----+----+----+----+ - * | e1 | e2 | e3 | e4 | e5 | e6 | - * +----+----+----+----+----+----+ - * - * And the corresponding C structure (as emitted by the proto compiler) would be: - * - * struct Foo { - * union { - * uint8_t bytes[1]; - * struct { - * bool a:1; - * bool b:1; - * bool c:1; - * } has; - * } set_flags; - * bool a; - * upb_uint32_array *b; - * Bar *c; - * } - * - * Because the C struct emitted by the upb compiler uses exactly the same - * byte-level format as the reflection interface, you can access the same hunk - * of memory either way. The C struct provides maximum performance and static - * type safety; upb_msg provides flexibility. - * - * The in-memory format has no interoperability guarantees whatsoever, except - * that a single version of upb will interoperate with itself. Don't even - * think about persisting the in-memory format or sending it anywhere. That's - * what serialized protobufs are for! The in-memory format is just that -- an - * in-memory representation that allows for fast access. - * - * The in-memory format is carefully designed to *not* mandate any particular - * memory management scheme. This should make it easier to integrate with - * existing memory management schemes, or to perform advanced techniques like - * reference counting, garbage collection, and string references. Different - * clients can read each others messages regardless of what memory management - * scheme each is using. - * - * A memory management scheme is provided for convenience, and it is used by - * default by the stock message parser. Clients can substitute their own - * memory management scheme into this parser without any loss of generality - * or performance. - */ - -#ifndef UPB_MSG_H_ -#define UPB_MSG_H_ - -#include -#include - -#include "upb.h" -#include "upb_table.h" -#include "upb_parse.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Forward declarations from descriptor.h. */ -struct google_protobuf_DescriptorProto; -struct google_protobuf_FieldDescriptorProto; - -/* Message definition. ********************************************************/ - -/* Structure that describes a single field in a message. This structure is very - * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), - * because copies of this struct are in the hash table that is read in the - * critical path of parsing. Minimizing the size of this struct increases - * cache-friendliness. */ -struct upb_msg_field { - union upb_symbol_ref ref; - uint32_t byte_offset; /* Where to find the data. */ - uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ - upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ - upb_label_t label; -}; - -/* Structure that describes a single .proto message type. */ -struct upb_msg { - struct google_protobuf_DescriptorProto *descriptor; - size_t size; - uint32_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */ - struct upb_inttable fields_by_num; - struct upb_strtable fields_by_name; - struct upb_msg_field *fields; - struct google_protobuf_FieldDescriptorProto **field_descriptors; -}; - -/* The num->field and name->field maps in upb_msg allow fast lookup of fields - * by number or name. These lookups are in the critical path of parsing and - * field lookup, so they must be as fast as possible. To make these more - * cache-friendly, we put the data in the table by value. */ - -struct upb_fieldsbynum_entry { - struct upb_inttable_entry e; - struct upb_msg_field f; -}; - -struct upb_fieldsbyname_entry { - struct upb_strtable_entry e; - struct upb_msg_field f; -}; - -/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */ -INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( - struct upb_msg_field *f, struct upb_msg *m) { - return m->field_descriptors[f->field_index]; -} - -/* Initializes/frees a upb_msg. Caller retains ownership of d, but the msg - * will contain references to it, so it must outlive the msg. Note that init - * does not resolve upb_msg_field.ref -- the caller should do that - * post-initialization by calling upb_msg_ref() below. */ -bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d); -void upb_msg_free(struct upb_msg *m); - -/* Clients use this function on a previously initialized upb_msg to resolve the - * "ref" field in the upb_msg_field. Since messages can refer to each other in - * mutually-recursive ways, this step must be separated from initialization. */ -void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref); - -/* Looks up a field by name or number. While these are written to be as fast - * as possible, it will still be faster to cache the results of this lookup if - * possible. These return NULL if no such field is found. */ -INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m, - uint32_t number) { - struct upb_fieldsbynum_entry *e = - (struct upb_fieldsbynum_entry*)upb_inttable_lookup( - &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); - return e ? &e->f : NULL; -} -INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, - struct upb_string *name) { - struct upb_fieldsbyname_entry *e = - (struct upb_fieldsbyname_entry*)upb_strtable_lookup( - &m->fields_by_name, name); - return e ? &e->f : NULL; -} - -/* "Set" flag reading and writing. *******************************************/ - -INLINE size_t upb_isset_offset(uint32_t field_index) { - return field_index / 8; -} - -INLINE uint8_t upb_isset_mask(uint32_t field_index) { - return 1 << (field_index % 8); -} - -/* Functions for reading and writing the "set" flags in the msg. Note that - * these do not perform memory management associated with any dynamic memory - * these fields may be referencing. These *only* set and test the flags. */ -INLINE void upb_msg_set(void *s, struct upb_msg_field *f) -{ - ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); -} - -INLINE void upb_msg_unset(void *s, struct upb_msg_field *f) -{ - ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); -} - -INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f) -{ - return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); -} - -INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m) -{ - int num_fields = m->num_required_fields; - int i = 0; - while(num_fields > 8) { - if(((uint8_t*)s)[i++] != 0xFF) return false; - num_fields -= 8; - } - if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false; - return true; -} - -INLINE void upb_msg_clear(void *s, struct upb_msg *m) -{ - memset(s, 0, m->set_flags_bytes); -} - -/* Scalar (non-array) data access. ********************************************/ - -/* Returns a pointer to a specific field in a message. */ -INLINE union upb_value_ptr upb_msg_getptr(void *data, struct upb_msg_field *f) { - union upb_value_ptr p; - p._void = ((char*)data + f->byte_offset); - return p; -} - -/* Arrays. ********************************************************************/ - -/* Represents an array (a repeated field) of any type. The interpretation of - * the data in the array depends on the type. */ -struct upb_array { - union upb_value_ptr elements; - uint32_t len; /* Measured in elements. */ -}; - -/* Returns a pointer to an array element. */ -INLINE union upb_value_ptr upb_array_getelementptr( - struct upb_array *arr, uint32_t n, upb_field_type_t type) -{ - union upb_value_ptr ptr; - ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size); - return ptr; -} - -/* These are all overlays on upb_array, pointers between them can be cast. */ -#define UPB_DEFINE_ARRAY_TYPE(name, type) \ - struct name ## _array { \ - type *elements; \ - uint32_t len; \ - }; - -UPB_DEFINE_ARRAY_TYPE(upb_double, double) -UPB_DEFINE_ARRAY_TYPE(upb_float, float) -UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) -UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) -UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) -UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) -UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) -UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) - -/* Defines an array of a specific message type. */ -#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array -#define UPB_DEFINE_MSG_ARRAY(msg_type) \ - UPB_MSG_ARRAY(msg_type) { \ - msg_type **elements; \ - uint32_t len; \ - }; - -/* Memory management *********************************************************/ - -/* One important note about these memory management routines: they must be used - * completely or not at all (for each message). In other words, you can't - * allocate your own message and then free it with upb_msgdata_free. As - * another example, you can't point a field to your own string and then call - * upb_msg_reuse_str. */ - -/* Allocates and frees message data, respectively. Newly allocated data is - * initialized to empty. Freeing a message always frees string data, but - * the client can decide whether or not submessages should be deleted. */ -void *upb_msgdata_new(struct upb_msg *m); -void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs); - -/* Given a pointer to the appropriate field of the message or array, these - * functions will lazily allocate memory for a string, array, or submessage. - * If the previously allocated memory is big enough, it will reuse it without - * re-allocating. See upb_msg.c for example usage. */ - -/* Reuse a string of at least the given size. */ -void upb_msg_reuse_str(struct upb_string **str, uint32_t size); -/* Like the previous, but assumes that the string will be by reference, so - * doesn't allocate memory for the string itself. */ -void upb_msg_reuse_strref(struct upb_string **str); - -/* Reuse an array of at least the given size, with the given type. */ -void upb_msg_reuse_array(struct upb_array **arr, uint32_t size, - upb_field_type_t t); - -/* Reuse a submessage of the given type. */ -void upb_msg_reuse_submsg(void **msg, struct upb_msg *m); - -/* Serialization/Deserialization. ********************************************/ - -/* This is all just a layer on top of the stream-oriented facility in - * upb_parse.h. */ - -struct upb_msg_parse_state { - struct upb_parse_state s; - bool merge; - bool byref; - struct upb_msg *m; -}; - -/* Initializes/frees a message parser. The parser will write the data to the - * message data "data", which the caller must have previously allocated (the - * parser will allocate submsgs, strings, and arrays as needed, however). - * - * "Merge" controls whether the parser will append to data instead of - * overwriting. Merging concatenates arrays and merges submessages instead - * of clearing both. - * - * "Byref" controls whether the new message data copies or references strings - * it encounters. If byref == true, then all strings supplied to upb_msg_parse - * must remain unchanged and must outlive data. */ -void upb_msg_parse_init(struct upb_msg_parse_state *s, void *data, - struct upb_msg *m, bool merge, bool byref); -void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *data, - struct upb_msg *m, bool merge, bool byref); -void upb_msg_parse_free(struct upb_msg_parse_state *s); - -/* Parses a protobuf fragment, writing the data to the message that was passed - * to upb_msg_parse_init. This function can be called multiple times as more - * data becomes available. */ -upb_status_t upb_msg_parse(struct upb_msg_parse_state *s, - void *data, size_t len, size_t *read); - -/* Parses the protobuf in s (which is expected to be complete) and allocates - * new message data to hold it. This is an alternative to the streaming API - * above. "byref" works as in upb_msg_parse_init(). */ -void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref); - - -/* Text dump *****************************************************************/ - -void upb_msg_print(void *data, struct upb_msg *m, FILE *stream); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_MSG_H_ */ -- cgit v1.2.3