/* * upb - a minimalist implementation of protocol buffers. * * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. * * upb_msg contains a full description of a message as defined in a .proto file. * It supports many features and operations for dealing with proto messages: * - reflection over .proto types at runtime (list fields, get names, etc). * - an in-memory byte-level format for efficiently storing and accessing msgs. * - serializing and deserializing from the in-memory format to a protobuf. * - optional memory management for handling strings, arrays, and submessages. * * The in-memory format is very much like a C struct that you can define at * run-time, but also supports reflection. Like C structs it supports * offset-based access, as opposed to the much slower name-based lookup. The * format represents both the values themselves and bits describing whether each * field is set or not. * * The upb compiler emits C structs that mimic this definition exactly, so that * you can access the same hunk of memory using either this run-time * reflection-supporting interface or a C struct that was generated by the upb * compiler. * * Like C structs the format depends on the endianness of the host machine, so * it is not suitable for exchanging across machines of differing endianness. * But there is no reason to do that -- the protobuf serialization format is * designed already for serialization/deserialization, and is more compact than * this format. This format is designed to allow the fastest possible random * access of individual fields. * * Note that clients need not use the memory management facilities defined here. * They are for convenience only -- clients wishing to do their own memory * management may do so (allowing clients to perform advanced techniques like * reference-counting, garbage collection, and string references). Different * clients can read each others messages regardless of what memory management * scheme each is using. */ #ifndef UPB_MSG_H_ #define UPB_MSG_H_ #include #include #include #include #include "upb.h" #include "upb_table.h" #include "upb_parse.h" #ifdef __cplusplus extern "C" { #endif /* Forward declarations from descriptor.h. */ struct google_protobuf_DescriptorProto; struct google_protobuf_FieldDescriptorProto; /* Message definition. ********************************************************/ /* Structure that describes a single field in a message. This structure is very * consciously designed to fit into 12/16 bytes (32/64 bit, respectively). */ struct upb_msg_field { union upb_symbol_ref ref; uint32_t byte_offset; /* Where to find the data. */ uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ upb_label_t label; }; /* Structure that describes a single .proto message type. */ struct upb_msg { struct google_protobuf_DescriptorProto *descriptor; size_t size; uint32_t num_fields; uint32_t set_flags_bytes; uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */ struct upb_inttable fields_by_num; struct upb_strtable fields_by_name; struct upb_msg_field *fields; struct google_protobuf_FieldDescriptorProto **field_descriptors; }; /* The num->field and name->field maps in upb_msg allow fast lookup of fields * by number or name. These lookups are in the critical path of parsing and * field lookup, so they must be as fast as possible. To make these more * cache-friendly, we put the data in the table by value. */ struct upb_fieldsbynum_entry { struct upb_inttable_entry e; struct upb_msg_field f; }; struct upb_fieldsbyname_entry { struct upb_strtable_entry e; struct upb_msg_field f; }; /* Can be used to retrieve a field descriptor given the upb_msg_field ref. */ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( struct upb_msg_field *f, struct upb_msg *m) { return m->field_descriptors[f->field_index]; } /* Initialize and free a upb_msg. Caller retains ownership of d, but the msg * will contain references to it, so it must outlive the msg. Note that init * does not resolve upb_msg_field.ref -- the caller should do that * post-initialization by calling upb_msg_ref() below. */ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d); void upb_msg_free(struct upb_msg *m); /* Clients use this function on a previously initialized upb_msg to resolve the * "ref" field in the upb_msg_field. Since messages can refer to each other in * mutually-recursive ways, this step must be separated from initialization. */ void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref); /* While these are written to be as fast as possible, it will still be faster * to cache the results of this lookup if possible. These return NULL if no * such field is found. */ INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m, uint32_t number) { struct upb_fieldsbynum_entry *e = upb_inttable_lookup( &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); return e ? &e->f : NULL; } INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, struct upb_string *name) { struct upb_fieldsbyname_entry *e = upb_strtable_lookup(&m->fields_by_name, name); return e ? &e->f : NULL; } /* Arrays. ********************************************************************/ /* Represents an array (a repeated field) of any type. The interpretation of * the data in the array depends on the type. */ struct upb_array { union { double *_double; float *_float; int32_t *int32; int64_t *int64; uint32_t *uint32; uint64_t *uint64; bool *_bool; struct upb_string **string; void **submsg; void *_void; } elements; uint32_t len; /* Measured in elements. */ }; /* These are all overlays on upb_array, pointers between them can be cast. */ #define UPB_DEFINE_ARRAY_TYPE(name, type) \ struct upb_ ## name ## _array { \ type *elements; \ uint32_t len; \ }; INLINE union upb_value_ptr upb_array_getelementptr( struct upb_array *arr, uint32_t n, upb_field_type_t type) { union upb_value_ptr ptr = { ._void = ((char*)arr->elements._void + n*upb_type_info[type].size) }; return ptr; } UPB_DEFINE_ARRAY_TYPE(upb_double, double) UPB_DEFINE_ARRAY_TYPE(upb_float, float) UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) #define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array #define UPB_DEFINE_MSG_ARRAY(msg_type) \ UPB_MSG_ARRAY(msg_type) { \ msg_type **elements; \ uint32_t len; \ }; /* Accessors for primitive types. ********************************************/ /* For each primitive type we define a set of three functions: * * // For fetching out of a msg (s points to the raw msg data). * int32_t *upb_msg_get_int32_ptr(void *s, struct upb_msg_field *f); * int32_t upb_msg_get_int32(void *s, struct upb_msg_field *f); * void upb_msg_set_int32(void *s, struct upb_msg_field *f, int32_t val); * * These do no existence checks, bounds checks, or type checks. */ #define UPB_DEFINE_ACCESSORS(INLINE, name, ctype) \ INLINE ctype *upb_msg_get_ ## name ## _ptr( \ void *s, struct upb_msg_field *f) { \ return (ctype*)((char*)s + f->byte_offset); \ } \ INLINE ctype upb_msg_get_ ## name( \ void *s, struct upb_msg_field *f) { \ return *upb_msg_get_ ## name ## _ptr(s, f); \ } \ INLINE void upb_msg_set_ ## name( \ void *s, struct upb_msg_field *f, ctype val) { \ *upb_msg_get_ ## name ## _ptr(s, f) = val; \ } UPB_DEFINE_ACCESSORS(INLINE, double, double) UPB_DEFINE_ACCESSORS(INLINE, float, float) UPB_DEFINE_ACCESSORS(INLINE, int32, int32_t) UPB_DEFINE_ACCESSORS(INLINE, int64, int64_t) UPB_DEFINE_ACCESSORS(INLINE, uint32, uint32_t) UPB_DEFINE_ACCESSORS(INLINE, uint64, uint64_t) UPB_DEFINE_ACCESSORS(INLINE, bool, bool) UPB_DEFINE_ACCESSORS(INLINE, bytes, struct upb_string*) UPB_DEFINE_ACCESSORS(INLINE, string, struct upb_string*) UPB_DEFINE_ACCESSORS(INLINE, submsg, void*) UPB_DEFINE_ACCESSORS(INLINE, array, struct upb_array*) INLINE union upb_value_ptr upb_msg_get_ptr( void *data, struct upb_msg_field *f) { union upb_value_ptr p = {._void = ((char*)data + f->byte_offset)}; return p; } /* Memory management *********************************************************/ void *upb_msg_new(struct upb_msg *m); struct upb_msg_parse_state { struct upb_parse_state s; bool merge; bool byref; struct upb_msg *m; }; void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg, struct upb_msg *m, bool merge, bool byref); void upb_msg_parse_free(struct upb_msg_parse_state *s); upb_status_t upb_msg_parse(struct upb_msg_parse_state *s, void *data, size_t len, size_t *read); void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref); /* Note! These two may not be use on a upb_string* that was initialized by * means other than these functions. */ void upb_msg_reuse_str(struct upb_string **str, uint32_t len); void upb_msg_reuse_array(struct upb_array **arr, uint32_t n, upb_field_type_t t); void upb_msg_reuse_strref(struct upb_string **str); void upb_msg_reuse_submsg(void **msg, struct upb_msg *m); /* "Set" flag reading and writing. *******************************************/ INLINE size_t upb_isset_offset(uint32_t field_index) { return field_index / 8; } INLINE uint8_t upb_isset_mask(uint32_t field_index) { return 1 << (field_index % 8); } /* Functions for reading and writing the "set" flags in the msg. Note that * these do not perform memory management associated with any dynamic memory * these fields may be referencing. These *only* set and test the flags. */ INLINE void upb_msg_set(void *s, struct upb_msg_field *f) { ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); } INLINE void upb_msg_unset(void *s, struct upb_msg_field *f) { ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); } INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f) { return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); } INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m) { int num_fields = m->num_required_fields; int i = 0; while(num_fields > 8) { if(((uint8_t*)s)[i++] != 0xFF) return false; num_fields -= 8; } if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false; return true; } INLINE void upb_msg_clear(void *s, struct upb_msg *m) { memset(s, 0, m->set_flags_bytes); } /* Serialization/Deserialization. ********************************************/ /* Parses the string data in s according to the message description in m. */ upb_status_t upb_msg_merge(void *data, struct upb_msg *m, struct upb_string *s); #ifdef __cplusplus } /* extern "C" */ #endif #endif /* UPB_MSG_H_ */