From 88cc3f2fc88aa233e9357f1c5194494e7de4c0ee Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 24 Jun 2009 21:30:04 -0700 Subject: Progress implementing upb_msg. --- upb.h | 8 ++++++ upb_context.c | 6 ++--- upb_context.h | 2 +- upb_msg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ upb_msg.h | 57 +++++++++++++++++++++++++++++++----------- upb_parse.c | 42 +++++++++++++++++-------------- 6 files changed, 157 insertions(+), 38 deletions(-) diff --git a/upb.h b/upb.h index 457e9ee..bb1563a 100644 --- a/upb.h +++ b/upb.h @@ -67,6 +67,14 @@ union upb_wire_value { * represent exceptional circumstances. */ typedef uint8_t upb_field_type_t; +struct upb_type_info { + uint8_t align; + uint8_t size; + uint8_t expected_wire_type; +}; + +extern struct upb_type_info upb_type_info[]; + /* A value as described in a .proto file, except delimited, which is handled * separately. */ union upb_value { diff --git a/upb_context.c b/upb_context.c index 23c2bdf..db20d89 100644 --- a/upb_context.c +++ b/upb_context.c @@ -4,7 +4,6 @@ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. */ -#include #include #include #include "descriptor.h" @@ -17,12 +16,13 @@ int memrchr(char *data, char c, size_t len) return off; } -void upb_context_init(struct upb_context *c) +bool upb_context_init(struct upb_context *c) { upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry)); /* Add all the types in descriptor.proto so we can parse descriptors. */ if(!upb_context_addfd(c, &google_protobuf_filedescriptor, UPB_ONREDEF_ERROR)) - assert(false); + return false; /* Indicates that upb is buggy or corrupt. */ + return true; } void upb_context_free(struct upb_context *c) diff --git a/upb_context.h b/upb_context.h index 3ec1566..d3e6904 100644 --- a/upb_context.h +++ b/upb_context.h @@ -42,7 +42,7 @@ struct upb_context { /* Initializes and frees a upb_context, respectively. Newly initialized * contexts will always have the types in descriptor.proto defined. */ -void upb_context_init(struct upb_context *c); +bool upb_context_init(struct upb_context *c); void upb_context_free(struct upb_context *c); /* Looking up symbols. ********************************************************/ diff --git a/upb_msg.c b/upb_msg.c index 48b0472..9c03301 100644 --- a/upb_msg.c +++ b/upb_msg.c @@ -3,4 +3,84 @@ * */ +#include +#include "descriptor.h" #include "upb_msg.h" + +#define ALIGN_UP(p, t) (t + ((p - 1) & (~t - 1))) + +static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; } + +static int div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + +static int compare_fields(const void *e1, const void *e2) { + const google_protobuf_FieldDescriptorProto *f1 = e1, *f2 = e2; + /* Required fields go before non-required. */ + if(f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED && + f2->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) { + return -1; + } else if(f1->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED && + f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) { + return 1; + } else { + /* Within required and non-required field lists, list in number order. */ + return f1->number - f2->number; + } +} + +bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d) +{ + /* TODO: more complete validation. */ + if(!d->set_flags.has.field) return false; + + upb_inttable_init(&m->fields_by_num, d->field->len, + sizeof(struct upb_fieldsbynum_entry)); + upb_strtable_init(&m->fields_by_name, d->field->len, + sizeof(struct upb_fieldsbyname_entry)); + + m->fields = malloc(sizeof(struct upb_msg_field) * d->field->len); + m->num_fields = d->field->len; + m->set_flags_bytes = div_round_up(m->num_fields, 8); + + /* These are incremented in the loop. */ + m->num_required_fields = 0; + m->size = m->set_flags_bytes; + + qsort(m->fields, d->field->len, sizeof(struct upb_msg_field), compare_fields); + + size_t max_align = 0; + + for(unsigned int i = 0; i < d->field->len; i++) { + struct upb_msg_field *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd; /* TODO */ + struct upb_type_info *type_info = &upb_type_info[f->type]; + f->field_index = i; + f->type = fd->type; + f->byte_offset = ALIGN_UP(m->size, type_info->align); + m->size = f->byte_offset + type_info->size; + max_align = max(max_align, type_info->align); + if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) + m->num_required_fields++; + + /* Insert into the tables. Note that f->ref will be uninitialized, even in + * the tables' copies of *f, which is why we must update them separately + * when the references are resolved. */ + struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; + struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; + upb_inttable_insert(&m->fields_by_num, &nument.e); + upb_strtable_insert(&m->fields_by_name, &strent.e); + } + + m->size = ALIGN_UP(m->size, max_align); + return true; +} + +void upb_msg_free(struct upb_msg *m) +{ + upb_inttable_free(&m->fields_by_num); + upb_strtable_free(&m->fields_by_name); + free(m->fields); +} diff --git a/upb_msg.h b/upb_msg.h index 6bbb3c3..54ca82d 100644 --- a/upb_msg.h +++ b/upb_msg.h @@ -48,17 +48,25 @@ extern "C" { /* Structure definition. ******************************************************/ struct upb_msg_field { - uint32_t byte_offset; /* Where to find the data. */ - uint16_t isset_byte_offset; /* The byte where the "set" bit lives. */ - uint8_t isset_byte_mask; - upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ - struct google_protobuf_FieldDescriptorProto *descriptor; + uint32_t byte_offset; /* Where to find the data. */ + uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */ + upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ union { - struct upb_msg *msg; - struct upb_enum *_enum; + struct upb_msg *msg; /* Set if type == MESSAGE */ + struct upb_enum *_enum; /* Set if type == ENUM */ } ref; }; +struct upb_fieldsbynum_entry { + struct upb_inttable_entry e; + struct upb_msg_field f; +}; + +struct upb_fieldsbyname_entry { + struct upb_strtable_entry e; + struct upb_msg_field f; +}; + struct upb_msg { struct google_protobuf_DescriptorProto *descriptor; size_t size; @@ -70,16 +78,27 @@ struct upb_msg { struct upb_msg_field *fields; }; -/* Initialize and free a upb_msg. Note that init does not resolve - * upb_msg_field.ref -- that is left to the caller. */ -void upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d); +/* Initialize and free a upb_msg. Caller retains ownership of d, but the msg + * will contain references to it, so it must outlive the msg. Note that init + * does not resolve upb_msg_field.ref -- that is left to the caller. */ +bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d); void upb_msg_free(struct upb_msg *m); /* While these are written to be as fast as possible, it will still be faster * to cache the results of this lookup if possible. These return NULL if no * such field is found. */ -struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, char *name); -struct upb_msg_field *upb_msg_fieldbynumber(struct upb_msg *m, uint32_t number); +INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m, + uint32_t number) { + struct upb_fieldsbynum_entry *e = upb_inttable_lookup( + &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); + return e ? &e->f : NULL; +} +INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, + struct upb_string *name) { + struct upb_fieldsbyname_entry *e = + upb_strtable_lookup(&m->fields_by_name, name); + return e ? &e->f : NULL; +} /* Variable-length data (strings and arrays).**********************************/ @@ -186,23 +205,31 @@ UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, string, INLINE) UPB_DEFINE_ALL_ACCESSORS(void*, substruct, INLINE) UPB_DEFINE_ACCESSORS(struct upb_array*, array, INLINE) +INLINE size_t upb_isset_offset(uint32_t field_index) { + return field_index / 8; +} + +INLINE size_t upb_isset_mask(uint32_t field_index) { + return 1 << (field_index % 8); +} + /* Functions for reading and writing the "set" flags in the pbstruct. Note * that these do not perform any memory management associated with any dynamic * memory these fields may be referencing; that is the client's responsibility. * These *only* set and test the flags. */ INLINE void upb_msg_set(void *s, struct upb_msg_field *f) { - ((char*)s)[f->isset_byte_offset] |= f->isset_byte_mask; + ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); } INLINE void upb_msg_unset(void *s, struct upb_msg_field *f) { - ((char*)s)[f->isset_byte_offset] &= ~f->isset_byte_mask; + ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); } INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f) { - return ((char*)s)[f->isset_byte_offset] & f->isset_byte_mask; + return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); } INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m) diff --git a/upb_parse.c b/upb_parse.c index 088091b..2b9c875 100644 --- a/upb_parse.c +++ b/upb_parse.c @@ -7,6 +7,7 @@ #include "upb_parse.h" #include +#include #include #include "descriptor.h" @@ -168,25 +169,28 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } #undef GET #undef T -upb_wire_type_t upb_expected_wire_types[] = { - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = UPB_WIRE_TYPE_64BIT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = UPB_WIRE_TYPE_32BIT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = UPB_WIRE_TYPE_64BIT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = UPB_WIRE_TYPE_32BIT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = UPB_WIRE_TYPE_DELIMITED, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = UPB_WIRE_TYPE_DELIMITED, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = -1, /* TODO */ - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = UPB_WIRE_TYPE_DELIMITED, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32] = UPB_WIRE_TYPE_32BIT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64] = UPB_WIRE_TYPE_64BIT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = UPB_WIRE_TYPE_VARINT, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = UPB_WIRE_TYPE_VARINT, +#define alignof(t) offsetof(struct { char c; t x; }, x) + +/* May want to move this to upb.c if enough other things warrant it. */ +struct upb_type_info upb_type_info[] = { + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, }; upb_status_t upb_parse_tag(void **buf, struct upb_tag *tag) -- cgit v1.2.3