From e252432a4176b6524e8c064673459e947ba11cb7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 14 Nov 2009 13:20:21 -0800 Subject: Refactoring: split defs into their own file, move private parsing funcs out of .h file. --- src/upb_context.c | 3 +- src/upb_def.c | 145 +++++++++++++++++++++++++++++++++++++++ src/upb_def.h | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_enum.c | 34 --------- src/upb_enum.h | 43 ------------ src/upb_msg.c | 110 ----------------------------- src/upb_msg.h | 165 +------------------------------------------- src/upb_parse.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_parse.h | 190 -------------------------------------------------- 9 files changed, 543 insertions(+), 541 deletions(-) create mode 100644 src/upb_def.c create mode 100644 src/upb_def.h delete mode 100644 src/upb_enum.c delete mode 100644 src/upb_enum.h (limited to 'src') diff --git a/src/upb_context.c b/src/upb_context.c index 63ca2b1..2ac14c6 100644 --- a/src/upb_context.c +++ b/src/upb_context.c @@ -8,8 +8,7 @@ #include #include "descriptor.h" #include "upb_context.h" -#include "upb_enum.h" -#include "upb_msg.h" +#include "upb_def.h" #include "upb_mm.h" /* Search for a character in a string, in reverse. */ diff --git a/src/upb_def.c b/src/upb_def.c new file mode 100644 index 0000000..5450f5f --- /dev/null +++ b/src/upb_def.c @@ -0,0 +1,145 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_def.h" +#include "descriptor.h" + +/* Rounds p up to the next multiple of t. */ +#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) + +static int div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + +/* Callback for sorting fields. */ +static int compare_fields(const void *e1, const void *e2) { + const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1; + const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2; + /* Required fields go before non-required. */ + bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + if(req1 != req2) { + return req2 - req1; + } else { + /* Within required and non-required field lists, list in number order. + * TODO: consider ordering by data size to reduce padding. */ + return fd1->number - fd2->number; + } +} + +void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) +{ + qsort(fds, num, sizeof(void*), compare_fields); +} + +void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, + struct upb_string fqname, bool sort, struct upb_context *c, + struct upb_status *status) +{ + (void)status; // Nothing that can fail at the moment. + int num_fields = d->set_flags.has.field ? d->field->len : 0; + upb_inttable_init(&m->fields_by_num, num_fields, + sizeof(struct upb_fieldsbynum_entry)); + upb_strtable_init(&m->fields_by_name, num_fields, + sizeof(struct upb_fieldsbyname_entry)); + + m->descriptor = d; + m->fqname = fqname; + m->context = c; + m->num_fields = num_fields; + m->set_flags_bytes = div_round_up(m->num_fields, 8); + /* These are incremented in the loop. */ + m->num_required_fields = 0; + m->size = m->set_flags_bytes; + + m->fields = malloc(sizeof(*m->fields) * m->num_fields); + m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields); + for(unsigned int i = 0; i < m->num_fields; i++) { + /* We count on the caller to keep this pointer alive. */ + m->field_descriptors[i] = d->field->elements[i]; + } + if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields); + + size_t max_align = 0; + for(unsigned int i = 0; i < m->num_fields; i++) { + struct upb_msg_fielddef *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; + struct upb_type_info *type_info = &upb_type_info[fd->type]; + + /* General alignment rules are: each member must be at an address that is a + * multiple of that type's alignment. Also, the size of the structure as + * a whole must be a multiple of the greatest alignment of any member. */ + f->field_index = i; + f->byte_offset = ALIGN_UP(m->size, type_info->align); + f->type = fd->type; + f->label = fd->label; + m->size = f->byte_offset + type_info->size; + max_align = UPB_MAX(max_align, type_info->align); + if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) + m->num_required_fields++; + + /* Insert into the tables. Note that f->ref will be uninitialized, even in + * the tables' copies of *f, which is why we must update them separately + * in upb_msg_setref() below. */ + struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; + struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; + upb_inttable_insert(&m->fields_by_num, &nument.e); + upb_strtable_insert(&m->fields_by_name, &strent.e); + } + + if(max_align > 0) + m->size = ALIGN_UP(m->size, max_align); +} + +void upb_msgdef_free(struct upb_msgdef *m) +{ + upb_inttable_free(&m->fields_by_num); + upb_strtable_free(&m->fields_by_name); + free(m->fields); + free(m->field_descriptors); +} + +void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f, + union upb_symbol_ref ref) { + struct google_protobuf_FieldDescriptorProto *d = + upb_msg_field_descriptor(f, m); + struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup( + &m->fields_by_num, d->number, sizeof(struct upb_fieldsbynum_entry)); + struct upb_fieldsbyname_entry *str_e = + upb_strtable_lookup(&m->fields_by_name, d->name); + assert(int_e && str_e); + f->ref = ref; + int_e->f.ref = ref; + str_e->f.ref = ref; +} + + +void upb_enum_init(struct upb_enum *e, + struct google_protobuf_EnumDescriptorProto *ed, + struct upb_context *c) { + int num_values = ed->set_flags.has.value ? ed->value->len : 0; + e->descriptor = ed; + e->context = c; + upb_atomic_refcount_init(&e->refcount, 0); + upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry)); + upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry)); + + for(int i = 0; i < num_values; i++) { + google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i]; + struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name}, + .value = value->number}; + struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number}, + .string = value->name}; + upb_strtable_insert(&e->nametoint, &ntoi_entry.e); + upb_inttable_insert(&e->inttoname, &iton_entry.e); + } +} + +void upb_enum_free(struct upb_enum *e) { + upb_strtable_free(&e->nametoint); + upb_inttable_free(&e->inttoname); +} diff --git a/src/upb_def.h b/src/upb_def.h new file mode 100644 index 0000000..a5d6d4c --- /dev/null +++ b/src/upb_def.h @@ -0,0 +1,202 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * Provides definitions of .proto constructs: + * - upb_msgdef: describes a "message" construct. + * - upb_msg_fielddef: describes a message field. + * - upb_enum: describes an enum. + * (TODO: descriptions of extensions and services). + * + * This file contains routines for creating and manipulating the definitions + * themselves. To create and manipulate actual messages, see upb_msg.h. + */ + +#ifndef UPB_DEF_H_ +#define UPB_DEF_H_ + +#include "upb_atomic.h" +#include "upb_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Message definition. ********************************************************/ + +struct upb_msg_fielddef; +struct upb_context; +/* Structure that describes a single .proto message type. */ +struct upb_msgdef { + struct upb_context *context; + struct upb_msg *default_msg; /* Message with all default values set. */ + struct google_protobuf_DescriptorProto *descriptor; + struct upb_string fqname; /* Fully qualified. */ + size_t size; + uint32_t num_fields; + uint32_t set_flags_bytes; + uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */ + struct upb_inttable fields_by_num; + struct upb_strtable fields_by_name; + struct upb_msg_fielddef *fields; + struct google_protobuf_FieldDescriptorProto **field_descriptors; +}; + +/* Structure that describes a single field in a message. This structure is very + * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), + * because copies of this struct are in the hash table that is read in the + * critical path of parsing. Minimizing the size of this struct increases + * cache-friendliness. */ +struct upb_msg_fielddef { + union upb_symbol_ref ref; + uint32_t byte_offset; /* Where to find the data. */ + uint16_t field_index; /* Indexes upb_msgdef.fields and indicates set bit */ + upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ + upb_label_t label; +}; + +INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) { + return upb_issubmsgtype(f->type); +} +INLINE bool upb_isstring(struct upb_msg_fielddef *f) { + return upb_isstringtype(f->type); +} +INLINE bool upb_isarray(struct upb_msg_fielddef *f) { + return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; +} + +INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) { + return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); +} + +INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) { + return upb_isstring(f) || upb_issubmsg(f); +} + +/* Defined iff upb_field_ismm(f). */ +INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) { + if(upb_isarray(f)) return UPB_MM_ARR_REF; + else if(upb_isstring(f)) return UPB_MM_STR_REF; + else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; + else return -1; +} + +/* Defined iff upb_elem_ismm(f). */ +INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) { + if(upb_isstring(f)) return UPB_MM_STR_REF; + else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; + else return -1; +} + +/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */ +INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( + struct upb_msg_fielddef *f, struct upb_msgdef *m) { + return m->field_descriptors[f->field_index]; +} + +/* Number->field and name->field lookup. *************************************/ + +/* The num->field and name->field maps in upb_msgdef allow fast lookup of fields + * by number or name. These lookups are in the critical path of parsing and + * field lookup, so they must be as fast as possible. To make these more + * cache-friendly, we put the data in the table by value. */ + +struct upb_fieldsbynum_entry { + struct upb_inttable_entry e; + struct upb_msg_fielddef f; +}; + +struct upb_fieldsbyname_entry { + struct upb_strtable_entry e; + struct upb_msg_fielddef f; +}; + +/* Looks up a field by name or number. While these are written to be as fast + * as possible, it will still be faster to cache the results of this lookup if + * possible. These return NULL if no such field is found. */ +INLINE struct upb_msg_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m, + uint32_t number) { + struct upb_fieldsbynum_entry *e = + (struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup( + &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); + return e ? &e->f : NULL; +} + +INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, + struct upb_string *name) { + struct upb_fieldsbyname_entry *e = + (struct upb_fieldsbyname_entry*)upb_strtable_lookup( + &m->fields_by_name, name); + return e ? &e->f : NULL; +} + +/* Enums. *********************************************************************/ + +struct upb_enum { + upb_atomic_refcount_t refcount; + struct upb_context *context; + struct google_protobuf_EnumDescriptorProto *descriptor; + struct upb_strtable nametoint; + struct upb_inttable inttoname; +}; + +struct upb_enum_ntoi_entry { + struct upb_strtable_entry e; + uint32_t value; +}; + +struct upb_enum_iton_entry { + struct upb_inttable_entry e; + struct upb_string *string; +}; + +/* Initializes and frees an enum, respectively. Caller retains ownership of + * ed, but it must outlive e. */ +void upb_enum_init(struct upb_enum *e, + struct google_protobuf_EnumDescriptorProto *ed, + struct upb_context *c); +void upb_enum_free(struct upb_enum *e); + + +/* Internal functions. ********************************************************/ + +/* Initializes/frees a upb_msgdef. Usually this will be called by upb_context, + * and clients will not have to construct one directly. + * + * Caller retains ownership of d, but the msg will contain references to it, so + * it must outlive the msg. Note that init does not resolve + * upb_msg_fielddef.ref the caller should do that post-initialization by + * calling upb_msg_ref() below. + * + * fqname indicates the fully-qualified name of this message. Ownership of + * fqname passes to the msg, but the msg will contain references to it, so it + * must outlive the msg. + * + * sort indicates whether or not it is safe to reorder the fields from the order + * they appear in d. This should be false if code has been compiled against a + * header for this type that expects the given order. */ +void upb_msgdef_init(struct upb_msgdef *m, + struct google_protobuf_DescriptorProto *d, + struct upb_string fqname, bool sort, + struct upb_context *c, struct upb_status *status); +void upb_msgdef_free(struct upb_msgdef *m); + +/* Sort the given field descriptors in-place, according to what we think is an + * optimal ordering of fields. This can change from upb release to upb + * release. */ +void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, + size_t num); + +/* Clients use this function on a previously initialized upb_msgdef to resolve + * the "ref" field in the upb_msg_fielddef. Since messages can refer to each + * other in mutually-recursive ways, this step must be separated from + * initialization. */ +void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f, + union upb_symbol_ref ref); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DEF_H_ */ diff --git a/src/upb_enum.c b/src/upb_enum.c deleted file mode 100644 index 4855d89..0000000 --- a/src/upb_enum.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "descriptor.h" -#include "upb_enum.h" - -void upb_enum_init(struct upb_enum *e, - struct google_protobuf_EnumDescriptorProto *ed, - struct upb_context *c) { - int num_values = ed->set_flags.has.value ? ed->value->len : 0; - e->descriptor = ed; - e->context = c; - upb_atomic_refcount_init(&e->refcount, 0); - upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry)); - upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry)); - - for(int i = 0; i < num_values; i++) { - google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i]; - struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name}, - .value = value->number}; - struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number}, - .string = value->name}; - upb_strtable_insert(&e->nametoint, &ntoi_entry.e); - upb_inttable_insert(&e->inttoname, &iton_entry.e); - } -} - -void upb_enum_free(struct upb_enum *e) { - upb_strtable_free(&e->nametoint); - upb_inttable_free(&e->inttoname); -} diff --git a/src/upb_enum.h b/src/upb_enum.h deleted file mode 100644 index 9acc075..0000000 --- a/src/upb_enum.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * upb_enum is a simple object that allows run-time reflection over the values - * defined within an enum. */ - -#ifndef UPB_ENUM_H_ -#define UPB_ENUM_H_ - -#include -#include "upb_atomic.h" -#include "upb_context.h" -#include "upb_table.h" -#include "descriptor.h" - -struct upb_enum { - upb_atomic_refcount_t refcount; - struct upb_context *context; - struct google_protobuf_EnumDescriptorProto *descriptor; - struct upb_strtable nametoint; - struct upb_inttable inttoname; -}; - -struct upb_enum_ntoi_entry { - struct upb_strtable_entry e; - uint32_t value; -}; - -struct upb_enum_iton_entry { - struct upb_inttable_entry e; - struct upb_string *string; -}; - -/* Initializes and frees an enum, respectively. Caller retains ownership of - * ed, but it must outlive e. */ -void upb_enum_init(struct upb_enum *e, - struct google_protobuf_EnumDescriptorProto *ed, - struct upb_context *c); -void upb_enum_free(struct upb_enum *e); - -#endif /* UPB_ENUM_H_ */ diff --git a/src/upb_msg.c b/src/upb_msg.c index c56168e..4c78063 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -13,116 +13,6 @@ #include "upb_serialize.h" #include "upb_text.h" -/* Rounds p up to the next multiple of t. */ -#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) - -static int div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -/* Callback for sorting fields. */ -static int compare_fields(const void *e1, const void *e2) { - const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1; - const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2; - /* Required fields go before non-required. */ - bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - if(req1 != req2) { - return req2 - req1; - } else { - /* Within required and non-required field lists, list in number order. - * TODO: consider ordering by data size to reduce padding. */ - return fd1->number - fd2->number; - } -} - -void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) -{ - qsort(fds, num, sizeof(void*), compare_fields); -} - -void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, - struct upb_string fqname, bool sort, struct upb_context *c, - struct upb_status *status) -{ - (void)status; // Nothing that can fail at the moment. - int num_fields = d->set_flags.has.field ? d->field->len : 0; - upb_inttable_init(&m->fields_by_num, num_fields, - sizeof(struct upb_fieldsbynum_entry)); - upb_strtable_init(&m->fields_by_name, num_fields, - sizeof(struct upb_fieldsbyname_entry)); - - m->descriptor = d; - m->fqname = fqname; - m->context = c; - m->num_fields = num_fields; - m->set_flags_bytes = div_round_up(m->num_fields, 8); - /* These are incremented in the loop. */ - m->num_required_fields = 0; - m->size = m->set_flags_bytes; - - m->fields = malloc(sizeof(*m->fields) * m->num_fields); - m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields); - for(unsigned int i = 0; i < m->num_fields; i++) { - /* We count on the caller to keep this pointer alive. */ - m->field_descriptors[i] = d->field->elements[i]; - } - if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields); - - size_t max_align = 0; - for(unsigned int i = 0; i < m->num_fields; i++) { - struct upb_msg_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; - struct upb_type_info *type_info = &upb_type_info[fd->type]; - - /* General alignment rules are: each member must be at an address that is a - * multiple of that type's alignment. Also, the size of the structure as - * a whole must be a multiple of the greatest alignment of any member. */ - f->field_index = i; - f->byte_offset = ALIGN_UP(m->size, type_info->align); - f->type = fd->type; - f->label = fd->label; - m->size = f->byte_offset + type_info->size; - max_align = UPB_MAX(max_align, type_info->align); - if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) - m->num_required_fields++; - - /* Insert into the tables. Note that f->ref will be uninitialized, even in - * the tables' copies of *f, which is why we must update them separately - * in upb_msg_setref() below. */ - struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; - struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; - upb_inttable_insert(&m->fields_by_num, &nument.e); - upb_strtable_insert(&m->fields_by_name, &strent.e); - } - - if(max_align > 0) - m->size = ALIGN_UP(m->size, max_align); -} - -void upb_msgdef_free(struct upb_msgdef *m) -{ - upb_inttable_free(&m->fields_by_num); - upb_strtable_free(&m->fields_by_name); - free(m->fields); - free(m->field_descriptors); -} - -void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f, - union upb_symbol_ref ref) { - struct google_protobuf_FieldDescriptorProto *d = - upb_msg_field_descriptor(f, m); - struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup( - &m->fields_by_num, d->number, sizeof(struct upb_fieldsbynum_entry)); - struct upb_fieldsbyname_entry *str_e = - upb_strtable_lookup(&m->fields_by_name, d->name); - assert(int_e && str_e); - f->ref = ref; - int_e->f.ref = ref; - str_e->f.ref = ref; -} - /* Parsing. ******************************************************************/ struct upb_msgparser_frame { diff --git a/src/upb_msg.h b/src/upb_msg.h index 397b504..552e2dc 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -3,29 +3,14 @@ * * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. * - * A upb_msgdef provides a full description of a message type as defined in a - * .proto file. Using a upb_msgdef, it is possible to treat an arbitrary hunk - * of memory (a void*) as a protobuf of the given type. We will call this - * void* a upb_msg in the context of this interface. - * - * Clients generally do not construct or destruct upb_msgdef objects directly. - * They are managed by upb_contexts, and clients can obtain upb_msgdef pointers - * directly from a upb_context. + * The upb_msg routines provide facilities for creating and manipulating + * messages according to a upb_msgdef definition. * * A upb_msg is READ-ONLY, and the upb_msgdef functions in this file provide * read-only access. For a mutable message, or for a message that you can take * a reference to to prevents its destruction, see upb_mm_msg.h, which is a * layer on top of upb_msg that adds memory management semantics. * - * upb_msgdef supports many features and operations for dealing with proto - * messages: - * - reflection over .proto types at runtime (list fields, get names, etc). - * - an in-memory byte-level format for efficiently storing and accessing msgs. - * - serializing from the in-memory format to a protobuf. - * - parsing from a protobuf to an in-memory data structure (you either - * supply callbacks for allocating/repurposing memory or use a simplified - * version that parses into newly-allocated memory). - * * The in-memory format is very much like a C struct that you can define at * run-time, but also supports reflection. Like C structs it supports * offset-based access, as opposed to the much slower name-based lookup. The @@ -56,6 +41,7 @@ #include "descriptor.h" #include "upb.h" +#include "upb_def.h" #include "upb_parse.h" #include "upb_table.h" @@ -63,78 +49,6 @@ extern "C" { #endif -/* Message definition. ********************************************************/ - -struct upb_msg_fielddef; -struct upb_context; -/* Structure that describes a single .proto message type. */ -struct upb_msgdef { - struct upb_context *context; - struct upb_msg *default_msg; /* Message with all default values set. */ - struct google_protobuf_DescriptorProto *descriptor; - struct upb_string fqname; /* Fully qualified. */ - size_t size; - uint32_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */ - struct upb_inttable fields_by_num; - struct upb_strtable fields_by_name; - struct upb_msg_fielddef *fields; - struct google_protobuf_FieldDescriptorProto **field_descriptors; -}; - -/* Structure that describes a single field in a message. This structure is very - * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), - * because copies of this struct are in the hash table that is read in the - * critical path of parsing. Minimizing the size of this struct increases - * cache-friendliness. */ -struct upb_msg_fielddef { - union upb_symbol_ref ref; - uint32_t byte_offset; /* Where to find the data. */ - uint16_t field_index; /* Indexes upb_msgdef.fields and indicates set bit */ - upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ - upb_label_t label; -}; - -INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) { - return upb_issubmsgtype(f->type); -} -INLINE bool upb_isstring(struct upb_msg_fielddef *f) { - return upb_isstringtype(f->type); -} -INLINE bool upb_isarray(struct upb_msg_fielddef *f) { - return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; -} - -INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) { - return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); -} - -INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) { - return upb_isstring(f) || upb_issubmsg(f); -} - -/* Defined iff upb_field_ismm(f). */ -INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) { - if(upb_isarray(f)) return UPB_MM_ARR_REF; - else if(upb_isstring(f)) return UPB_MM_STR_REF; - else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; - else return -1; -} - -/* Defined iff upb_elem_ismm(f). */ -INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) { - if(upb_isstring(f)) return UPB_MM_STR_REF; - else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; - else return -1; -} - -/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */ -INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( - struct upb_msg_fielddef *f, struct upb_msgdef *m) { - return m->field_descriptors[f->field_index]; -} - /* Message structure. *********************************************************/ /* Constructs a new msg corresponding to the given msgdef, and having one @@ -217,43 +131,6 @@ INLINE void upb_msg_clear(struct upb_msg *msg) memset(msg->data, 0, msg->def->set_flags_bytes); } -/* Number->field and name->field lookup. *************************************/ - -/* The num->field and name->field maps in upb_msgdef allow fast lookup of fields - * by number or name. These lookups are in the critical path of parsing and - * field lookup, so they must be as fast as possible. To make these more - * cache-friendly, we put the data in the table by value. */ - -struct upb_fieldsbynum_entry { - struct upb_inttable_entry e; - struct upb_msg_fielddef f; -}; - -struct upb_fieldsbyname_entry { - struct upb_strtable_entry e; - struct upb_msg_fielddef f; -}; - -/* Looks up a field by name or number. While these are written to be as fast - * as possible, it will still be faster to cache the results of this lookup if - * possible. These return NULL if no such field is found. */ -INLINE struct upb_msg_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m, - uint32_t number) { - struct upb_fieldsbynum_entry *e = - (struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup( - &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); - return e ? &e->f : NULL; -} - -INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, - struct upb_string *name) { - struct upb_fieldsbyname_entry *e = - (struct upb_fieldsbyname_entry*)upb_strtable_lookup( - &m->fields_by_name, name); - return e ? &e->f : NULL; -} - - /* Parsing ********************************************************************/ /* TODO: a stream parser. */ @@ -319,42 +196,6 @@ void upb_msg_serialize_all(struct upb_msg *msg, struct upb_msgsizes *sizes, bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive); void upb_msg_print(struct upb_msg *data, bool single_line, FILE *stream); -/* Internal functions. ********************************************************/ - -/* Initializes/frees a upb_msgdef. Usually this will be called by upb_context, - * and clients will not have to construct one directly. - * - * Caller retains ownership of d, but the msg will contain references to it, so - * it must outlive the msg. Note that init does not resolve - * upb_msg_fielddef.ref the caller should do that post-initialization by - * calling upb_msg_ref() below. - * - * fqname indicates the fully-qualified name of this message. Ownership of - * fqname passes to the msg, but the msg will contain references to it, so it - * must outlive the msg. - * - * sort indicates whether or not it is safe to reorder the fields from the order - * they appear in d. This should be false if code has been compiled against a - * header for this type that expects the given order. */ -void upb_msgdef_init(struct upb_msgdef *m, - struct google_protobuf_DescriptorProto *d, - struct upb_string fqname, bool sort, - struct upb_context *c, struct upb_status *status); -void upb_msgdef_free(struct upb_msgdef *m); - -/* Sort the given field descriptors in-place, according to what we think is an - * optimal ordering of fields. This can change from upb release to upb - * release. */ -void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, - size_t num); - -/* Clients use this function on a previously initialized upb_msgdef to resolve - * the "ref" field in the upb_msg_fielddef. Since messages can refer to each - * other in mutually-recursive ways, this step must be separated from - * initialization. */ -void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f, - union upb_symbol_ref ref); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_parse.c b/src/upb_parse.c index 4ae2202..d1d535a 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -9,6 +9,198 @@ #include #include +/* Functions to read wire values. *********************************************/ + +// These functions are internal to the parser, but might be moved into an +// internal header file if we at some point in the future opt to do code +// generation, because the generated code would want to inline these functions. +// The same applies to the functions to read .proto values below. + +uint8_t *upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, + struct upb_status *status); + +// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). +INLINE uint8_t *upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, + struct upb_status *status) +{ + // We inline this common case (1-byte varints), if that fails we dispatch to + // the full (non-inlined) version. + if((*buf & 0x80) == 0) { + *val = *buf & 0x7f; + return buf + 1; + } else { + return upb_get_v_uint64_t_full(buf, end, val, status); + } +} + +// Gets a varint -- called when we only need 32 bits of it. +INLINE uint8_t *upb_get_v_uint32_t(uint8_t *buf, uint8_t *end, + uint32_t *val, struct upb_status *status) +{ + uint64_t val64; + uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); + *val = (uint32_t)val64; // Discard the high bits. + return ret; +} + +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +INLINE uint8_t *upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, + uint32_t *val, struct upb_status *status) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } +#if UPB_UNALIGNED_READS_OK + *val = *(uint32_t*)buf; +#else +#define SHL(val, bits) ((uint32_t)val << bits) + *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24); +#undef SHL +#endif + return uint32_end; +} + +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +INLINE uint8_t *upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, + uint64_t *val, struct upb_status *status) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } +#if UPB_UNALIGNED_READS_OK + *val = *(uint64_t*)buf; +#else +#define SHL(val, bits) ((uint64_t)val << bits) + *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) | + SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56); +#undef SHL +#endif + return uint64_end; +} + +INLINE uint8_t *upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end, + struct upb_status *status) +{ + uint8_t *const maxend = buf + 10; + uint8_t last = 0x80; + for(; buf < (uint8_t*)end && (last & 0x80); buf++) + last = *buf; + + if(buf >= end && buf <= maxend && (last & 0x80)) { + status->code = UPB_STATUS_NEED_MORE_DATA; + buf = end; + } else if(buf > maxend) { + status->code = UPB_ERROR_UNTERMINATED_VARINT; + buf = end; + } + return buf; +} + +INLINE uint8_t *upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, + struct upb_status *status) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } + return uint32_end; +} + +INLINE uint8_t *upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, + struct upb_status *status) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } + return uint64_end; +} + +/* Functions to read .proto values. *******************************************/ + +// Performs zig-zag decoding, which is used by sint32 and sint64. +INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + +// Use macros to define a set of two functions for each .proto type: +// +// // Reads and converts a .proto value from buf, placing it in d. +// // "end" indicates the end of the current buffer (if the buffer does +// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). +// // On success, a pointer will be returned to the first byte that was +// // not consumed. +// uint8_t *upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d, +// struct upb_status *status); +// +// // Given an already read wire value s (source), convert it to a .proto +// // value and return it. +// int32_t upb_wvtov_INT32(uint32_t s); +// +// These are the most efficient functions to call if you want to decode a value +// for a known type. + +#define WVTOV(type, wire_t, val_t) \ + INLINE val_t upb_wvtov_ ## type(wire_t s) + +#define GET(type, v_or_f, wire_t, val_t, member_name) \ + INLINE uint8_t *upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, \ + struct upb_status *status) { \ + wire_t tmp = 0; \ + uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \ + *d = upb_wvtov_ ## type(tmp); \ + return ret; \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ + GET(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t) + +T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } +T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; } +T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; } +T(DOUBLE, f, uint64_t, double, _double) { + union upb_value v; + v.uint64 = s; + return v._double; +} +T(FLOAT, f, uint32_t, float, _float) { + union upb_value v; + v.uint32 = s; + return v._float; +} + +#undef WVTOV +#undef GET +#undef T + +// Parses a tag, places the result in *tag. +INLINE uint8_t *parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, + struct upb_status *status) +{ + uint32_t tag_int; + uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return ret; +} + + /** * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version * handles 1 and 2 byte varints). diff --git a/src/upb_parse.h b/src/upb_parse.h index 9f832c9..9e64a5b 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -122,196 +122,6 @@ uint8_t *upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, union upb_wire_value *wv, struct upb_status *status); -/* Functions to read wire values. *********************************************/ - -// Most clients will not want to use these directly. - -uint8_t *upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, - struct upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE uint8_t *upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, - struct upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } -} - -// Gets a varint -- called when we only need 32 bits of it. -INLINE uint8_t *upb_get_v_uint32_t(uint8_t *buf, uint8_t *end, - uint32_t *val, struct upb_status *status) -{ - uint64_t val64; - uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} - -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE uint8_t *upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, - uint32_t *val, struct upb_status *status) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } -#if UPB_UNALIGNED_READS_OK - *val = *(uint32_t*)buf; -#else -#define SHL(val, bits) ((uint32_t)val << bits) - *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24); -#undef SHL -#endif - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE uint8_t *upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, - uint64_t *val, struct upb_status *status) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } -#if UPB_UNALIGNED_READS_OK - *val = *(uint64_t*)buf; -#else -#define SHL(val, bits) ((uint64_t)val << bits) - *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) | - SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56); -#undef SHL -#endif - return uint64_end; -} - -INLINE uint8_t *upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end, - struct upb_status *status) -{ - uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE uint8_t *upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, - struct upb_status *status) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE uint8_t *upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, - struct upb_status *status) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - - -/* Functions to read .proto values. *******************************************/ - - -// Performs zig-zag decoding, which is used by sint32 and sint64. -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - -// Use macros to define a set of two functions for each .proto type: -// -// // Reads and converts a .proto value from buf, placing it in d. -// // "end" indicates the end of the current buffer (if the buffer does -// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). -// // On success, a pointer will be returned to the first byte that was -// // not consumed. -// uint8_t *upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d, -// struct upb_status *status); -// -// // Given an already read wire value s (source), convert it to a .proto -// // value and return it. -// int32_t upb_wvtov_INT32(uint32_t s); -// -// These are the most efficient functions to call if you want to decode a value -// for a known type. - -#define WVTOV(type, wire_t, val_t) \ - INLINE val_t upb_wvtov_ ## type(wire_t s) - -#define GET(type, v_or_f, wire_t, val_t, member_name) \ - INLINE uint8_t *upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, \ - struct upb_status *status) { \ - wire_t tmp = 0; \ - uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \ - *d = upb_wvtov_ ## type(tmp); \ - return ret; \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ - GET(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t) - -T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - union upb_value v; - v.uint64 = s; - return v._double; -} -T(FLOAT, f, uint32_t, float, _float) { - union upb_value v; - v.uint32 = s; - return v._float; -} - -#undef WVTOV -#undef GET -#undef T - -// Parses a tag, places the result in *tag. -INLINE uint8_t *parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, - struct upb_status *status) -{ - uint32_t tag_int; - uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - #ifdef __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3