summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-06-27 11:49:41 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-06-27 11:49:41 -0700
commit3a67a1e9f95535adbc67f42771789cd83443a377 (patch)
tree086e67412cf2adc042e1aeaa2fc78d663fbed4b8
parentc7f2a271ae29066744cf09499f744a0c6b89a27e (diff)
upb_msg constructor now more or less works.
-rw-r--r--upb_msg.c50
-rw-r--r--upb_msg.h69
2 files changed, 79 insertions, 40 deletions
diff --git a/upb_msg.c b/upb_msg.c
index 9c03301..0517cf0 100644
--- a/upb_msg.c
+++ b/upb_msg.c
@@ -17,17 +17,18 @@ static int div_round_up(int numerator, int denominator) {
}
static int compare_fields(const void *e1, const void *e2) {
- const google_protobuf_FieldDescriptorProto *f1 = e1, *f2 = e2;
+ const struct upb_msg_field *f1 = e1, *f2 = e2;
+ const google_protobuf_FieldDescriptorProto *fd1 = f1->descriptor;
+ const google_protobuf_FieldDescriptorProto *fd2 = f2->descriptor;
/* Required fields go before non-required. */
- if(f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
- f2->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
- return -1;
- } else if(f1->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
- f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
- return 1;
+ bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
+ bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
+ if(req1 != req2) {
+ return req2 - req1;
} else {
- /* Within required and non-required field lists, list in number order. */
- return f1->number - f2->number;
+ /* Within required and non-required field lists, list in number order.
+ * TODO: consider ordering by data size to reduce padding. */
+ return fd1->number - fd2->number;
}
}
@@ -41,35 +42,44 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d)
upb_strtable_init(&m->fields_by_name, d->field->len,
sizeof(struct upb_fieldsbyname_entry));
- m->fields = malloc(sizeof(struct upb_msg_field) * d->field->len);
m->num_fields = d->field->len;
m->set_flags_bytes = div_round_up(m->num_fields, 8);
-
/* These are incremented in the loop. */
m->num_required_fields = 0;
m->size = m->set_flags_bytes;
- qsort(m->fields, d->field->len, sizeof(struct upb_msg_field), compare_fields);
+ m->fields = malloc(sizeof(struct upb_msg_field) * m->num_fields);
+ for(unsigned int i = 0; i < m->num_fields; i++) {
+ /* We count on the caller to keep this pointer alive. */
+ m->fields[i].descriptor = d->field->elements[i];
+ }
+ qsort(m->fields, m->num_fields, sizeof(struct upb_msg_field), compare_fields);
size_t max_align = 0;
- for(unsigned int i = 0; i < d->field->len; i++) {
+ for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
- google_protobuf_FieldDescriptorProto *fd; /* TODO */
- struct upb_type_info *type_info = &upb_type_info[f->type];
+ google_protobuf_FieldDescriptorProto *fd = f->descriptor;
+ struct upb_type_info *type_info = &upb_type_info[fd->type];
+
+ /* General alignment rules are: each member must be at an address that is a
+ * multiple of that type's alignment. Also, the size of the structure as
+ * a whole must be a multiple of the greatest alignment of any member. */
f->field_index = i;
- f->type = fd->type;
f->byte_offset = ALIGN_UP(m->size, type_info->align);
m->size = f->byte_offset + type_info->size;
max_align = max(max_align, type_info->align);
if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
m->num_required_fields++;
- /* Insert into the tables. Note that f->ref will be uninitialized, even in
- * the tables' copies of *f, which is why we must update them separately
+ /* Insert into the tables. Note that af->ref will be uninitialized, even in
+ * the tables' copies of *af, which is why we must update them separately
* when the references are resolved. */
- struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
- struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
+ struct upb_abbrev_msg_field af = {.byte_offset = f->byte_offset,
+ .field_index = f->field_index,
+ .type = fd->type};
+ struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = af};
+ struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = af};
upb_inttable_insert(&m->fields_by_num, &nument.e);
upb_strtable_insert(&m->fields_by_name, &strent.e);
}
diff --git a/upb_msg.h b/upb_msg.h
index 407daa1..4d85fcc 100644
--- a/upb_msg.h
+++ b/upb_msg.h
@@ -45,38 +45,67 @@
extern "C" {
#endif
+/* Forward declarations from descriptor.h. */
+struct google_protobuf_DescriptorProto;
+struct google_protobuf_FieldDescriptorProto;
+
/* Structure definition. ******************************************************/
+/* Fields that reference other types have pointers to the other type. */
+union upb_msg_field_ref {
+ struct upb_msg *msg; /* Set if type == MESSAGE */
+ struct upb_enum *_enum; /* Set if type == ENUM */
+};
+
+/* Structure that describes a single field in a message. */
struct upb_msg_field {
+ struct google_protobuf_FieldDescriptorProto *descriptor;
+ uint32_t byte_offset; /* Where to find the data. */
+ uint32_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
+ union upb_msg_field_ref ref;
+};
+
+/* Structure that describes a single .proto message type. */
+struct upb_msg {
+ struct google_protobuf_DescriptorProto *descriptor;
+ size_t size;
+ uint32_t num_fields;
+ uint32_t set_flags_bytes;
+ uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */
+ struct upb_inttable fields_by_num;
+ struct upb_strtable fields_by_name;
+ struct upb_msg_field *fields;
+};
+
+/* The num->field and name->field maps in upb_msg allow fast lookup of fields
+ * by number or name. These lookups are in the critical path of parsing and
+ * field lookup, so they must be as fast as possible. To make these more
+ * cache-friendly, we put the data in the table by value, but use only an
+ * abbreviated set of data (ie. not all the data in upb_msg_field). Notably,
+ * we don't include the pointer to the field descriptor. But the upb_msg_field
+ * can be retrieved in its entirety using the function below.*/
+
+struct upb_abbrev_msg_field {
uint32_t byte_offset; /* Where to find the data. */
uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
- union {
- struct upb_msg *msg; /* Set if type == MESSAGE */
- struct upb_enum *_enum; /* Set if type == ENUM */
- } ref;
+ union upb_msg_field_ref ref;
};
struct upb_fieldsbynum_entry {
struct upb_inttable_entry e;
- struct upb_msg_field f;
+ struct upb_abbrev_msg_field f;
};
struct upb_fieldsbyname_entry {
struct upb_strtable_entry e;
- struct upb_msg_field f;
+ struct upb_abbrev_msg_field f;
};
-struct upb_msg {
- struct google_protobuf_DescriptorProto *descriptor;
- size_t size;
- int num_fields;
- int set_flags_bytes;
- int num_required_fields; /* Required fields have the lowest set bytemasks. */
- struct upb_inttable fields_by_num;
- struct upb_strtable fields_by_name;
- struct upb_msg_field *fields;
-};
+struct upb_msg_field *upb_get_msg_field(
+ struct upb_abbrev_msg_field *f, struct upb_msg *m) {
+ return &m->fields[f->field_index];
+}
/* Initialize and free a upb_msg. Caller retains ownership of d, but the msg
* will contain references to it, so it must outlive the msg. Note that init
@@ -87,14 +116,14 @@ void upb_msg_free(struct upb_msg *m);
/* While these are written to be as fast as possible, it will still be faster
* to cache the results of this lookup if possible. These return NULL if no
* such field is found. */
-INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
- uint32_t number) {
+INLINE struct upb_abbrev_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
+ uint32_t number) {
struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
&m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
return e ? &e->f : NULL;
}
-INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
- struct upb_string *name) {
+INLINE struct upb_abbrev_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
+ struct upb_string *name) {
struct upb_fieldsbyname_entry *e =
upb_strtable_lookup(&m->fields_by_name, name);
return e ? &e->f : NULL;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback