From 3a67a1e9f95535adbc67f42771789cd83443a377 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 27 Jun 2009 11:49:41 -0700 Subject: upb_msg constructor now more or less works. --- upb_msg.c | 50 +++++++++++++++++++++++++++------------------ upb_msg.h | 69 +++++++++++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 79 insertions(+), 40 deletions(-) diff --git a/upb_msg.c b/upb_msg.c index 9c03301..0517cf0 100644 --- a/upb_msg.c +++ b/upb_msg.c @@ -17,17 +17,18 @@ static int div_round_up(int numerator, int denominator) { } static int compare_fields(const void *e1, const void *e2) { - const google_protobuf_FieldDescriptorProto *f1 = e1, *f2 = e2; + const struct upb_msg_field *f1 = e1, *f2 = e2; + const google_protobuf_FieldDescriptorProto *fd1 = f1->descriptor; + const google_protobuf_FieldDescriptorProto *fd2 = f2->descriptor; /* Required fields go before non-required. */ - if(f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED && - f2->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) { - return -1; - } else if(f1->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED && - f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) { - return 1; + bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + if(req1 != req2) { + return req2 - req1; } else { - /* Within required and non-required field lists, list in number order. */ - return f1->number - f2->number; + /* Within required and non-required field lists, list in number order. + * TODO: consider ordering by data size to reduce padding. */ + return fd1->number - fd2->number; } } @@ -41,35 +42,44 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d) upb_strtable_init(&m->fields_by_name, d->field->len, sizeof(struct upb_fieldsbyname_entry)); - m->fields = malloc(sizeof(struct upb_msg_field) * d->field->len); m->num_fields = d->field->len; m->set_flags_bytes = div_round_up(m->num_fields, 8); - /* These are incremented in the loop. */ m->num_required_fields = 0; m->size = m->set_flags_bytes; - qsort(m->fields, d->field->len, sizeof(struct upb_msg_field), compare_fields); + m->fields = malloc(sizeof(struct upb_msg_field) * m->num_fields); + for(unsigned int i = 0; i < m->num_fields; i++) { + /* We count on the caller to keep this pointer alive. */ + m->fields[i].descriptor = d->field->elements[i]; + } + qsort(m->fields, m->num_fields, sizeof(struct upb_msg_field), compare_fields); size_t max_align = 0; - for(unsigned int i = 0; i < d->field->len; i++) { + for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_msg_field *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd; /* TODO */ - struct upb_type_info *type_info = &upb_type_info[f->type]; + google_protobuf_FieldDescriptorProto *fd = f->descriptor; + struct upb_type_info *type_info = &upb_type_info[fd->type]; + + /* General alignment rules are: each member must be at an address that is a + * multiple of that type's alignment. Also, the size of the structure as + * a whole must be a multiple of the greatest alignment of any member. */ f->field_index = i; - f->type = fd->type; f->byte_offset = ALIGN_UP(m->size, type_info->align); m->size = f->byte_offset + type_info->size; max_align = max(max_align, type_info->align); if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) m->num_required_fields++; - /* Insert into the tables. Note that f->ref will be uninitialized, even in - * the tables' copies of *f, which is why we must update them separately + /* Insert into the tables. Note that af->ref will be uninitialized, even in + * the tables' copies of *af, which is why we must update them separately * when the references are resolved. */ - struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; - struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; + struct upb_abbrev_msg_field af = {.byte_offset = f->byte_offset, + .field_index = f->field_index, + .type = fd->type}; + struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = af}; + struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = af}; upb_inttable_insert(&m->fields_by_num, &nument.e); upb_strtable_insert(&m->fields_by_name, &strent.e); } diff --git a/upb_msg.h b/upb_msg.h index 407daa1..4d85fcc 100644 --- a/upb_msg.h +++ b/upb_msg.h @@ -45,38 +45,67 @@ extern "C" { #endif +/* Forward declarations from descriptor.h. */ +struct google_protobuf_DescriptorProto; +struct google_protobuf_FieldDescriptorProto; + /* Structure definition. ******************************************************/ +/* Fields that reference other types have pointers to the other type. */ +union upb_msg_field_ref { + struct upb_msg *msg; /* Set if type == MESSAGE */ + struct upb_enum *_enum; /* Set if type == ENUM */ +}; + +/* Structure that describes a single field in a message. */ struct upb_msg_field { + struct google_protobuf_FieldDescriptorProto *descriptor; + uint32_t byte_offset; /* Where to find the data. */ + uint32_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ + union upb_msg_field_ref ref; +}; + +/* Structure that describes a single .proto message type. */ +struct upb_msg { + struct google_protobuf_DescriptorProto *descriptor; + size_t size; + uint32_t num_fields; + uint32_t set_flags_bytes; + uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */ + struct upb_inttable fields_by_num; + struct upb_strtable fields_by_name; + struct upb_msg_field *fields; +}; + +/* The num->field and name->field maps in upb_msg allow fast lookup of fields + * by number or name. These lookups are in the critical path of parsing and + * field lookup, so they must be as fast as possible. To make these more + * cache-friendly, we put the data in the table by value, but use only an + * abbreviated set of data (ie. not all the data in upb_msg_field). Notably, + * we don't include the pointer to the field descriptor. But the upb_msg_field + * can be retrieved in its entirety using the function below.*/ + +struct upb_abbrev_msg_field { uint32_t byte_offset; /* Where to find the data. */ uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */ upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ - union { - struct upb_msg *msg; /* Set if type == MESSAGE */ - struct upb_enum *_enum; /* Set if type == ENUM */ - } ref; + union upb_msg_field_ref ref; }; struct upb_fieldsbynum_entry { struct upb_inttable_entry e; - struct upb_msg_field f; + struct upb_abbrev_msg_field f; }; struct upb_fieldsbyname_entry { struct upb_strtable_entry e; - struct upb_msg_field f; + struct upb_abbrev_msg_field f; }; -struct upb_msg { - struct google_protobuf_DescriptorProto *descriptor; - size_t size; - int num_fields; - int set_flags_bytes; - int num_required_fields; /* Required fields have the lowest set bytemasks. */ - struct upb_inttable fields_by_num; - struct upb_strtable fields_by_name; - struct upb_msg_field *fields; -}; +struct upb_msg_field *upb_get_msg_field( + struct upb_abbrev_msg_field *f, struct upb_msg *m) { + return &m->fields[f->field_index]; +} /* Initialize and free a upb_msg. Caller retains ownership of d, but the msg * will contain references to it, so it must outlive the msg. Note that init @@ -87,14 +116,14 @@ void upb_msg_free(struct upb_msg *m); /* While these are written to be as fast as possible, it will still be faster * to cache the results of this lookup if possible. These return NULL if no * such field is found. */ -INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m, - uint32_t number) { +INLINE struct upb_abbrev_msg_field *upb_msg_fieldbynum(struct upb_msg *m, + uint32_t number) { struct upb_fieldsbynum_entry *e = upb_inttable_lookup( &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); return e ? &e->f : NULL; } -INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, - struct upb_string *name) { +INLINE struct upb_abbrev_msg_field *upb_msg_fieldbyname(struct upb_msg *m, + struct upb_string *name) { struct upb_fieldsbyname_entry *e = upb_strtable_lookup(&m->fields_by_name, name); return e ? &e->f : NULL; -- cgit v1.2.3