From 324d5cce9b8bf4802e7de80badbda410542348d0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 22 Jun 2009 17:56:36 -0700 Subject: Renamed upb_struct -> upb_msg, fleshed out its definition. --- upb_msg.h | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 upb_msg.h (limited to 'upb_msg.h') diff --git a/upb_msg.h b/upb_msg.h new file mode 100644 index 0000000..c6a3766 --- /dev/null +++ b/upb_msg.h @@ -0,0 +1,226 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * upb_msg contains a full description of a message as defined in a .proto file. + * This allows for run-time reflection over .proto types, but also defines an + * in-memory byte-level format for storing protobufs. + * + * The in-memory format is very much like a C struct that you can define at + * run-time, but also supports reflection. Like C structs it supports + * offset-based access, as opposed to the much slower name-based lookup. The + * format represents both the values themselves and bits describing whether each + * field is set or not. + * + * The upb compiler emits C structs that mimic this definition exactly, so that + * you can access the same hunk of memory using either this run-time + * reflection-supporting interface or a C struct that was generated by the upb + * compiler. + * + * Like C structs the format depends on the endianness of the host machine, so + * it is not suitable for exchanging across machines of differing endianness. + * But there is no reason to do that -- the protobuf serialization format is + * designed already for serialization/deserialization, and is more compact than + * this format. This format is designed to allow the fastest possible random + * access of individual fields. + * + * Note that no memory management is defined, which should make it easier to + * integrate this format with existing memory-management schemes. Any memory + * management semantics can be used with the format as defined here. + */ + +#ifndef PBSTRUCT_H_ +#define PBSTRUCT_H_ + +#include +#include +#include +#include + +#include "upb.h" +#include "upb_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Structure definition. ******************************************************/ + +/* One single field of the struct. */ +struct upb_msg_field { + uint32_t byte_offset; /* Where to find the data. */ + uint16_t isset_byte_offset; /* The byte where the "set" bit lives. */ + uint8_t isset_byte_mask; + uint8_t type; /* Copied from the descriptor for cache-friendliness. */ + struct google_protobuf_FieldDescriptorProto *descriptor; + union { + struct upb_msg *msg; + struct upb_enum *_enum; + } ref; +}; + +/* Definition of a complete struct. */ +struct upb_msg { + struct google_protobuf_DescriptorProto *descriptor; + size_t size; + int num_fields; + int set_flags_bytes; + int num_required_fields; /* Required fields have the lowest set bytemasks. */ + struct upb_inttable fields_by_num; + struct upb_strtable fields_by_name; + struct upb_msg_field fields[]; +}; + +/* While these are written to be as fast as possible, it will still be faster + * to cache the results of this lookup if possible. These return NULL if no + * such field is found. */ +struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, char *name); +struct upb_msg_field *upb_msg_fieldbynumber(struct upb_msg *m, uint32_t number); + +/* Variable-length data (strings and arrays).**********************************/ + +/* Represents an array (a repeated field) of any type. The interpretation of + * the data in the array depends on the type. */ +struct upb_array { + void *data; /* Size of individual elements is based on type. */ + uint32_t len; /* Measured in elements. */ +}; + +/* A generic array of structs, using void* instead of specific types. */ +struct upb_msg_array { + void **elements; + uint32_t len; +}; + +/* An array of strings. */ +struct upb_string_array { + struct upb_string **elements; + uint32_t len; +}; + +/* Specific arrays of all the primitive types. */ +#define UPB_DEFINE_PRIMITIVE_ARRAY(type, name) \ + struct upb_ ## name ## _array { \ + size_t len; \ + type *elements; \ + }; + +UPB_DEFINE_PRIMITIVE_ARRAY(double, double) +UPB_DEFINE_PRIMITIVE_ARRAY(float, float) +UPB_DEFINE_PRIMITIVE_ARRAY(int32_t, int32) +UPB_DEFINE_PRIMITIVE_ARRAY(int64_t, int64) +UPB_DEFINE_PRIMITIVE_ARRAY(uint32_t, uint32) +UPB_DEFINE_PRIMITIVE_ARRAY(uint64_t, uint64) +UPB_DEFINE_PRIMITIVE_ARRAY(bool, bool) +#undef UPB_DEFINE_PRMITIVE_ARRAY + +#define UPB_STRUCT_ARRAY(struct_type) struct struct_type ## _array + +#define UPB_DEFINE_STRUCT_ARRAY(struct_type) \ + UPB_STRUCT_ARRAY(struct_type) { \ + size_t len; \ + struct_type **elements; \ + }; + +/* Accessors for primitive types. ********************************************/ + +/* For each primitive type we define a set of six functions: + * + * // For fetching out of a struct (s points to the raw struct data). + * int32_t *upb_msg_get_int32_ptr(void *s, struct upb_msg_field *f); + * int32_t upb_msg_get_int32(void *s, struct upb_msg_field *f); + * void upb_msg_set_int32(void *s, struct upb_msg_field *f, int32_t val); + * + * // For fetching out of an array. + * int32_t *upb_array_get_int32_ptr(struct upb_array *a, int n); + * int32_t upb_array_get_int32(struct upb_array *a, int n); + * void upb_array_set_int32(struct upb_array *a, int n, ctype val); + * + * For arrays we provide only the first three because protobufs do not support + * arrays of arrays. + * + * These do no existence checks, bounds checks, or type checks. */ + +#define UPB_DEFINE_ACCESSORS(ctype, name, INLINE) \ + INLINE ctype *upb_msg_get_ ## name ## _ptr( \ + void *s, struct upb_msg_field *f) { \ + return (ctype*)((char*)s + f->byte_offset); \ + } \ + INLINE ctype upb_msg_get_ ## name( \ + void *s, struct upb_msg_field *f) { \ + return *upb_msg_get_ ## name ## _ptr(s, f); \ + } \ + INLINE void upb_msg_set_ ## name( \ + void *s, struct upb_msg_field *f, ctype val) { \ + *upb_msg_get_ ## name ## _ptr(s, f) = val; \ + } + +#define UPB_DEFINE_ARRAY_ACCESSORS(ctype, name, INLINE) \ + INLINE ctype *upb_array_get_ ## name ## _ptr(struct upb_array *a, int n) { \ + return ((ctype*)a->data) + n; \ + } \ + INLINE ctype upb_array_get_ ## name(struct upb_array *a, int n) { \ + return *upb_array_get_ ## name ## _ptr(a, n); \ + } \ + INLINE void upb_array_set_ ## name(struct upb_array *a, int n, ctype val) { \ + *upb_array_get_ ## name ## _ptr(a, n) = val; \ + } + +#define UPB_DEFINE_ALL_ACCESSORS(ctype, name, INLINE) \ + UPB_DEFINE_ACCESSORS(ctype, name, INLINE) \ + UPB_DEFINE_ARRAY_ACCESSORS(ctype, name, INLINE) + +UPB_DEFINE_ALL_ACCESSORS(double, double, INLINE) +UPB_DEFINE_ALL_ACCESSORS(float, float, INLINE) +UPB_DEFINE_ALL_ACCESSORS(int32_t, int32, INLINE) +UPB_DEFINE_ALL_ACCESSORS(int64_t, int64, INLINE) +UPB_DEFINE_ALL_ACCESSORS(uint32_t, uint32, INLINE) +UPB_DEFINE_ALL_ACCESSORS(uint64_t, uint64, INLINE) +UPB_DEFINE_ALL_ACCESSORS(bool, bool, INLINE) +UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, bytes, INLINE) +UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, string, INLINE) +UPB_DEFINE_ALL_ACCESSORS(void*, substruct, INLINE) +UPB_DEFINE_ACCESSORS(struct upb_array*, array, INLINE) + +/* Functions for reading and writing the "set" flags in the pbstruct. Note + * that these do not perform any memory management associated with any dynamic + * memory these fields may be referencing; that is the client's responsibility. + * These *only* set and test the flags. */ +INLINE void upb_msg_set(void *s, struct upb_msg_field *f) +{ + ((char*)s)[f->isset_byte_offset] |= f->isset_byte_mask; +} + +INLINE void upb_msg_unset(void *s, struct upb_msg_field *f) +{ + ((char*)s)[f->isset_byte_offset] &= ~f->isset_byte_mask; +} + +INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f) +{ + return ((char*)s)[f->isset_byte_offset] & f->isset_byte_mask; +} + +INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m) +{ + int num_fields = m->num_required_fields; + int i = 0; + while(num_fields > 8) { + if(((uint8_t*)s)[i++] != 0xFF) return false; + num_fields -= 8; + } + if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false; + return true; +} + +INLINE void upb_msg_clear(void *s, struct upb_msg *m) +{ + memset(s, 0, m->set_flags_bytes); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PBSTRUCT_H_ */ -- cgit v1.2.3