summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-06-24 21:30:04 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-06-24 21:30:04 -0700
commit88cc3f2fc88aa233e9357f1c5194494e7de4c0ee (patch)
tree2be82557fb69f26c04c89854ecff6b3f372f1b6a
parentec67a5ded6a3f9c77e265e1beb3efff9fd68aa32 (diff)
Progress implementing upb_msg.
-rw-r--r--upb.h8
-rw-r--r--upb_context.c6
-rw-r--r--upb_context.h2
-rw-r--r--upb_msg.c80
-rw-r--r--upb_msg.h57
-rw-r--r--upb_parse.c42
6 files changed, 157 insertions, 38 deletions
diff --git a/upb.h b/upb.h
index 457e9ee..bb1563a 100644
--- a/upb.h
+++ b/upb.h
@@ -67,6 +67,14 @@ union upb_wire_value {
* represent exceptional circumstances. */
typedef uint8_t upb_field_type_t;
+struct upb_type_info {
+ uint8_t align;
+ uint8_t size;
+ uint8_t expected_wire_type;
+};
+
+extern struct upb_type_info upb_type_info[];
+
/* A value as described in a .proto file, except delimited, which is handled
* separately. */
union upb_value {
diff --git a/upb_context.c b/upb_context.c
index 23c2bdf..db20d89 100644
--- a/upb_context.c
+++ b/upb_context.c
@@ -4,7 +4,6 @@
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
-#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "descriptor.h"
@@ -17,12 +16,13 @@ int memrchr(char *data, char c, size_t len)
return off;
}
-void upb_context_init(struct upb_context *c)
+bool upb_context_init(struct upb_context *c)
{
upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry));
/* Add all the types in descriptor.proto so we can parse descriptors. */
if(!upb_context_addfd(c, &google_protobuf_filedescriptor, UPB_ONREDEF_ERROR))
- assert(false);
+ return false; /* Indicates that upb is buggy or corrupt. */
+ return true;
}
void upb_context_free(struct upb_context *c)
diff --git a/upb_context.h b/upb_context.h
index 3ec1566..d3e6904 100644
--- a/upb_context.h
+++ b/upb_context.h
@@ -42,7 +42,7 @@ struct upb_context {
/* Initializes and frees a upb_context, respectively. Newly initialized
* contexts will always have the types in descriptor.proto defined. */
-void upb_context_init(struct upb_context *c);
+bool upb_context_init(struct upb_context *c);
void upb_context_free(struct upb_context *c);
/* Looking up symbols. ********************************************************/
diff --git a/upb_msg.c b/upb_msg.c
index 48b0472..9c03301 100644
--- a/upb_msg.c
+++ b/upb_msg.c
@@ -3,4 +3,84 @@
*
*/
+#include <stdlib.h>
+#include "descriptor.h"
#include "upb_msg.h"
+
+#define ALIGN_UP(p, t) (t + ((p - 1) & (~t - 1)))
+
+static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
+
+static int div_round_up(int numerator, int denominator) {
+ /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
+ return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
+}
+
+static int compare_fields(const void *e1, const void *e2) {
+ const google_protobuf_FieldDescriptorProto *f1 = e1, *f2 = e2;
+ /* Required fields go before non-required. */
+ if(f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
+ f2->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
+ return -1;
+ } else if(f1->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
+ f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
+ return 1;
+ } else {
+ /* Within required and non-required field lists, list in number order. */
+ return f1->number - f2->number;
+ }
+}
+
+bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d)
+{
+ /* TODO: more complete validation. */
+ if(!d->set_flags.has.field) return false;
+
+ upb_inttable_init(&m->fields_by_num, d->field->len,
+ sizeof(struct upb_fieldsbynum_entry));
+ upb_strtable_init(&m->fields_by_name, d->field->len,
+ sizeof(struct upb_fieldsbyname_entry));
+
+ m->fields = malloc(sizeof(struct upb_msg_field) * d->field->len);
+ m->num_fields = d->field->len;
+ m->set_flags_bytes = div_round_up(m->num_fields, 8);
+
+ /* These are incremented in the loop. */
+ m->num_required_fields = 0;
+ m->size = m->set_flags_bytes;
+
+ qsort(m->fields, d->field->len, sizeof(struct upb_msg_field), compare_fields);
+
+ size_t max_align = 0;
+
+ for(unsigned int i = 0; i < d->field->len; i++) {
+ struct upb_msg_field *f = &m->fields[i];
+ google_protobuf_FieldDescriptorProto *fd; /* TODO */
+ struct upb_type_info *type_info = &upb_type_info[f->type];
+ f->field_index = i;
+ f->type = fd->type;
+ f->byte_offset = ALIGN_UP(m->size, type_info->align);
+ m->size = f->byte_offset + type_info->size;
+ max_align = max(max_align, type_info->align);
+ if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
+ m->num_required_fields++;
+
+ /* Insert into the tables. Note that f->ref will be uninitialized, even in
+ * the tables' copies of *f, which is why we must update them separately
+ * when the references are resolved. */
+ struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
+ struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
+ upb_inttable_insert(&m->fields_by_num, &nument.e);
+ upb_strtable_insert(&m->fields_by_name, &strent.e);
+ }
+
+ m->size = ALIGN_UP(m->size, max_align);
+ return true;
+}
+
+void upb_msg_free(struct upb_msg *m)
+{
+ upb_inttable_free(&m->fields_by_num);
+ upb_strtable_free(&m->fields_by_name);
+ free(m->fields);
+}
diff --git a/upb_msg.h b/upb_msg.h
index 6bbb3c3..54ca82d 100644
--- a/upb_msg.h
+++ b/upb_msg.h
@@ -48,17 +48,25 @@ extern "C" {
/* Structure definition. ******************************************************/
struct upb_msg_field {
- uint32_t byte_offset; /* Where to find the data. */
- uint16_t isset_byte_offset; /* The byte where the "set" bit lives. */
- uint8_t isset_byte_mask;
- upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
- struct google_protobuf_FieldDescriptorProto *descriptor;
+ uint32_t byte_offset; /* Where to find the data. */
+ uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */
+ upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
union {
- struct upb_msg *msg;
- struct upb_enum *_enum;
+ struct upb_msg *msg; /* Set if type == MESSAGE */
+ struct upb_enum *_enum; /* Set if type == ENUM */
} ref;
};
+struct upb_fieldsbynum_entry {
+ struct upb_inttable_entry e;
+ struct upb_msg_field f;
+};
+
+struct upb_fieldsbyname_entry {
+ struct upb_strtable_entry e;
+ struct upb_msg_field f;
+};
+
struct upb_msg {
struct google_protobuf_DescriptorProto *descriptor;
size_t size;
@@ -70,16 +78,27 @@ struct upb_msg {
struct upb_msg_field *fields;
};
-/* Initialize and free a upb_msg. Note that init does not resolve
- * upb_msg_field.ref -- that is left to the caller. */
-void upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d);
+/* Initialize and free a upb_msg. Caller retains ownership of d, but the msg
+ * will contain references to it, so it must outlive the msg. Note that init
+ * does not resolve upb_msg_field.ref -- that is left to the caller. */
+bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d);
void upb_msg_free(struct upb_msg *m);
/* While these are written to be as fast as possible, it will still be faster
* to cache the results of this lookup if possible. These return NULL if no
* such field is found. */
-struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, char *name);
-struct upb_msg_field *upb_msg_fieldbynumber(struct upb_msg *m, uint32_t number);
+INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
+ uint32_t number) {
+ struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
+ &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
+ return e ? &e->f : NULL;
+}
+INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
+ struct upb_string *name) {
+ struct upb_fieldsbyname_entry *e =
+ upb_strtable_lookup(&m->fields_by_name, name);
+ return e ? &e->f : NULL;
+}
/* Variable-length data (strings and arrays).**********************************/
@@ -186,23 +205,31 @@ UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, string, INLINE)
UPB_DEFINE_ALL_ACCESSORS(void*, substruct, INLINE)
UPB_DEFINE_ACCESSORS(struct upb_array*, array, INLINE)
+INLINE size_t upb_isset_offset(uint32_t field_index) {
+ return field_index / 8;
+}
+
+INLINE size_t upb_isset_mask(uint32_t field_index) {
+ return 1 << (field_index % 8);
+}
+
/* Functions for reading and writing the "set" flags in the pbstruct. Note
* that these do not perform any memory management associated with any dynamic
* memory these fields may be referencing; that is the client's responsibility.
* These *only* set and test the flags. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{
- ((char*)s)[f->isset_byte_offset] |= f->isset_byte_mask;
+ ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}
INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{
- ((char*)s)[f->isset_byte_offset] &= ~f->isset_byte_mask;
+ ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}
INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f)
{
- return ((char*)s)[f->isset_byte_offset] & f->isset_byte_mask;
+ return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}
INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
diff --git a/upb_parse.c b/upb_parse.c
index 088091b..2b9c875 100644
--- a/upb_parse.c
+++ b/upb_parse.c
@@ -7,6 +7,7 @@
#include "upb_parse.h"
#include <assert.h>
+#include <stddef.h>
#include <string.h>
#include "descriptor.h"
@@ -168,25 +169,28 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef GET
#undef T
-upb_wire_type_t upb_expected_wire_types[] = {
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = UPB_WIRE_TYPE_64BIT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = UPB_WIRE_TYPE_32BIT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = UPB_WIRE_TYPE_64BIT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = UPB_WIRE_TYPE_32BIT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = UPB_WIRE_TYPE_DELIMITED,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = UPB_WIRE_TYPE_DELIMITED,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = -1, /* TODO */
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = UPB_WIRE_TYPE_DELIMITED,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32] = UPB_WIRE_TYPE_32BIT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64] = UPB_WIRE_TYPE_64BIT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = UPB_WIRE_TYPE_VARINT,
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = UPB_WIRE_TYPE_VARINT,
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+
+/* May want to move this to upb.c if enough other things warrant it. */
+struct upb_type_info upb_type_info[] = {
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
};
upb_status_t upb_parse_tag(void **buf, struct upb_tag *tag)
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback