summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-06-28 09:31:46 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-06-28 09:31:46 -0700
commit2da3b081c47fe8fe7476da228323d2040e876f56 (patch)
treef402f516d30be67dd3713477862c5248b8a882a2
parent3a67a1e9f95535adbc67f42771789cd83443a377 (diff)
More work on upb_context.
-rw-r--r--upb.h16
-rw-r--r--upb_context.c180
-rw-r--r--upb_context.h27
-rw-r--r--upb_enum.h60
-rw-r--r--upb_msg.h20
-rw-r--r--upb_table.h9
6 files changed, 256 insertions, 56 deletions
diff --git a/upb.h b/upb.h
index bb1563a..5197395 100644
--- a/upb.h
+++ b/upb.h
@@ -29,6 +29,9 @@ extern "C" {
/* The maximum that any submessages can be nested. Matches proto2's limit. */
#define UPB_MAX_NESTING 64
+/* The maximum number of fields that any one .proto type can have. */
+#define UPB_MAX_FIELDS (1<<16)
+
/* Represents a string or bytes. */
struct upb_string {
/* We expect the data to be 8-bit clean (uint8_t), but char* is such an
@@ -97,6 +100,19 @@ struct upb_tag {
upb_wire_type_t wire_type;
};
+enum upb_symbol_type {
+ UPB_SYM_MESSAGE,
+ UPB_SYM_ENUM,
+ UPB_SYM_SERVICE,
+ UPB_SYM_EXTENSION
+};
+
+union upb_symbol_ref {
+ struct upb_msg *msg;
+ struct upb_enum *_enum;
+ struct upb_svc *svc;
+};
+
/* Status codes used as a return value. */
typedef enum upb_status {
UPB_STATUS_OK = 0,
diff --git a/upb_context.c b/upb_context.c
index db20d89..642270a 100644
--- a/upb_context.c
+++ b/upb_context.c
@@ -8,8 +8,10 @@
#include <string.h>
#include "descriptor.h"
#include "upb_context.h"
+#include "upb_enum.h"
+#include "upb_msg.h"
-int memrchr(char *data, char c, size_t len)
+static int memrchr(char *data, char c, size_t len)
{
int off = len-1;
while(off > 0 && data[off] != c) --off;
@@ -20,7 +22,7 @@ bool upb_context_init(struct upb_context *c)
{
upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry));
/* Add all the types in descriptor.proto so we can parse descriptors. */
- if(!upb_context_addfd(c, &google_protobuf_filedescriptor, UPB_ONREDEF_ERROR))
+ if(!upb_context_addfd(c, &google_protobuf_filedescriptor))
return false; /* Indicates that upb is buggy or corrupt. */
return true;
}
@@ -36,9 +38,9 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c,
return upb_strtable_lookup(&c->symtab, symbol);
}
-struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
- struct upb_string *base,
- struct upb_string *symbol)
+static struct upb_symtab_entry *resolve(struct upb_strtable *t,
+ struct upb_string *base,
+ struct upb_string *symbol)
{
if(base->byte_len + symbol->byte_len + 1 >= UPB_SYM_MAX_LENGTH ||
symbol->byte_len == 0) return NULL;
@@ -47,7 +49,7 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
/* Symbols starting with '.' are absolute, so we do a single lookup. */
struct upb_string sym_str = {.data = symbol->data+1,
.byte_len = symbol->byte_len-1};
- return upb_context_lookup(c, &sym_str);
+ return upb_strtable_lookup(t, &sym_str);
} else {
/* Remove components from base until we find an entry or run out. */
char sym[UPB_SYM_MAX_LENGTH+1];
@@ -60,7 +62,7 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
memcpy(sym + baselen + 1, symbol->data, symbol->byte_len);
sym_str.byte_len = baselen + symbol->byte_len + 1;
- struct upb_symtab_entry *e = upb_context_lookup(c, &sym_str);
+ struct upb_symtab_entry *e = upb_strtable_lookup(t, &sym_str);
if (e) return e;
else if(baselen == 0) return NULL; /* No more scopes to try. */
@@ -69,23 +71,161 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
}
}
+union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2,
+ struct upb_string *base, struct upb_string *sym,
+ enum upb_symbol_type expected_type) {
+ union upb_symbol_ref nullref = {.msg = NULL};
+ struct upb_symtab_entry *e = resolve(t1, base, sym);
+ if(e == NULL) e = resolve(t2, base, sym);
+
+ if(e && e->type == expected_type) return e->ref;
+ else return nullref;
+}
+
+
+struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
+ struct upb_string *base,
+ struct upb_string *symbol) {
+ return resolve(&c->symtab, base, symbol);
+}
+
+/* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+ * join("", "Baz") -> "Baz"
+ * Caller owns the returned string and must free it. */
+static struct upb_string join(struct upb_string *base, struct upb_string *name) {
+ size_t len = base->byte_len + name->byte_len;
+ if(base->byte_len > 0) len++; /* For the separator. */
+ struct upb_string joined = {.byte_len=len, .data=malloc(len)};
+ if(base->byte_len > 0) {
+ /* nested_base = base + '.' + d->name */
+ memcpy(joined.data, base->data, base->byte_len);
+ joined.data[base->byte_len] = UPB_CONTEXT_SEPARATOR;
+ memcpy(&joined.data[base->byte_len+1], name->data, name->byte_len);
+ } else {
+ memcpy(joined.data, name->data, name->byte_len);
+ }
+ return joined;
+}
+
+static bool insert_enum(struct upb_strtable *t,
+ google_protobuf_EnumDescriptorProto *ed,
+ struct upb_string *base)
+{
+ if(!ed->set_flags.has.name) return false;
+
+ /* We own this and must free it on destruct. */
+ struct upb_string fqname = join(base, ed->name);
+
+ /* Redefinition within a FileDescriptorProto is not allowed. */
+ if(upb_strtable_lookup(t, &fqname)) {
+ free(fqname.data);
+ return false;
+ }
+
+ struct upb_symtab_entry e;
+ e.e.key = fqname;
+ e.type = UPB_SYM_ENUM;
+ e.ref._enum = malloc(sizeof(*e.ref._enum));
+ upb_enum_init(e.ref._enum, ed);
+ upb_strtable_insert(t, &e.e);
+
+ return true;
+}
+
+static bool insert_message(struct upb_strtable *t,
+ google_protobuf_DescriptorProto *d,
+ struct upb_string *base)
+{
+ if(!d->set_flags.has.name) return false;
+
+ /* We own this and must free it on destruct. */
+ struct upb_string fqname = join(base, d->name);
+
+ /* Redefinition within a FileDescriptorProto is not allowed. */
+ if(upb_strtable_lookup(t, d->name)) {
+ free(fqname.data);
+ return false;
+ }
+
+ struct upb_symtab_entry e;
+ e.e.key = fqname;
+ e.type = UPB_SYM_MESSAGE;
+ e.ref.msg = malloc(sizeof(*e.ref.msg));
+ upb_msg_init(e.ref.msg, d);
+ upb_strtable_insert(t, &e.e);
+
+ /* Add nested messages and enums. */
+ if(d->set_flags.has.nested_type)
+ for(unsigned int i = 0; i < d->nested_type->len; i++)
+ if(!insert_message(t, d->nested_type->elements[i], &fqname))
+ return false;
+
+ if(d->set_flags.has.enum_type)
+ for(unsigned int i = 0; i < d->enum_type->len; i++)
+ if(!insert_enum(t, d->enum_type->elements[i], &fqname))
+ return false;
+
+ return true;
+}
+
bool upb_context_addfd(struct upb_context *c,
- google_protobuf_FileDescriptorProto *fd,
- int onredef)
+ google_protobuf_FileDescriptorProto *fd)
{
+ struct upb_string package = {.byte_len=0};
+ if(fd->set_flags.has.package) package = *fd->package;
+
+ /* We want the entire add operation to be atomic, so we initially insert into
+ * this temporary map of symbols. Once we have verified that there are no
+ * errors (all symbols can be resolved and no illegal redefinitions occurred)
+ * only then do we insert into the context's table. */
+ struct upb_strtable tmp;
+ int symcount = (fd->set_flags.has.message_type ? fd->message_type->len : 0) +
+ (fd->set_flags.has.enum_type ? fd->enum_type->len : 0) +
+ (fd->set_flags.has.service ? fd->service->len : 0);
+ upb_strtable_init(&tmp, symcount, sizeof(struct upb_symtab_entry));
+
/* TODO: properly handle redefinitions and unresolvable symbols. */
- if(fd->set_flags.has.message_type) {
- for(unsigned int i = 0; i < fd->message_type->len; i++) {
- struct google_protobuf_DescriptorProto *d = fd->message_type->elements[i];
- if(!d->set_flags.has.name) return false;
- struct upb_symtab_entry e;
- e.e.key = *d->name;
- e.type = UPB_SYM_MESSAGE;
- e.p.msg = malloc(sizeof(*e.p.msg));
- upb_msg_init(e.p.msg, d);
- upb_strtable_insert(&c->symtab, &e.e);
+ if(fd->set_flags.has.message_type)
+ for(unsigned int i = 0; i < fd->message_type->len; i++)
+ if(!insert_message(&tmp, fd->message_type->elements[i], &package))
+ goto error;
+
+ if(fd->set_flags.has.enum_type)
+ for(unsigned int i = 0; i < fd->enum_type->len; i++)
+ if(!insert_enum(&tmp, fd->enum_type->elements[i], &package))
+ goto error;
+
+ /* TODO: handle extensions and services. */
+
+ /* Attempt to resolve all references. */
+ struct upb_symtab_entry *e;
+ for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) {
+ if(upb_strtable_lookup(&c->symtab, &e->e.key))
+ goto error; /* Redefinition prohibited. */
+ if(e->type == UPB_SYM_MESSAGE) {
+ struct upb_msg *m = e->ref.msg;
+ for(unsigned int i = 0; i < m->num_fields; i++) {
+ struct upb_msg_field *f = &m->fields[i];
+ google_protobuf_FieldDescriptorProto *fd = f->descriptor;
+ union upb_symbol_ref ref;
+ if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE)
+ ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_MESSAGE);
+ else if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM)
+ ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_ENUM);
+ else
+ continue; /* No resolving necessary. */
+ if(!ref.msg) goto error;
+ upb_msg_ref(m, f, &ref);
+ }
}
}
- /* TODO: handle enums, extensions, and services. */
+
+ /* All references were successfully resolved -- add to the symbol table. */
+ for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e))
+ upb_strtable_insert(&c->symtab, &e->e);
+
return true;
+
+error:
+ return false;
}
diff --git a/upb_context.h b/upb_context.h
index d3e6904..3b84889 100644
--- a/upb_context.h
+++ b/upb_context.h
@@ -19,21 +19,10 @@
extern "C" {
#endif
-enum upb_symbol_type {
- UPB_SYM_MESSAGE,
- UPB_SYM_ENUM,
- UPB_SYM_SERVICE,
- UPB_SYM_EXTENSION
-};
-
struct upb_symtab_entry {
struct upb_strtable_entry e;
enum upb_symbol_type type;
- union {
- struct upb_msg *msg;
- struct upb_enum *_enum;
- struct upb_svc *svc;
- } p;
+ union upb_symbol_ref ref;
};
struct upb_context {
@@ -73,23 +62,15 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c,
/* Adding symbols. ************************************************************/
-/* Enum controlling what happens if a symbol is redefined. */
-enum upb_onredef {
- UPB_ONREDEF_REPLACE, /* Replace existing definition (must be same type). */
- UPB_ONREDEF_KEEP, /* Keep existing definition, ignore new one. */
- UPB_ONREDEF_ERROR /* Error on redefinition. */
-};
-
/* Adds the definitions in the given file descriptor to this context. All
* types that are referenced from fd must have previously been defined (or be
- * defined in fd). onredef controls the behavior in the case that fd attempts
- * to define a type that is already defined.
+ * defined in fd). fd may not attempt to define any names that are already
+ * defined in this context.
*
* Caller retains ownership of fd, but the context will contain references to
* it, so it must outlive the context. */
bool upb_context_addfd(struct upb_context *c,
- google_protobuf_FileDescriptorProto *fd,
- int onredef);
+ google_protobuf_FileDescriptorProto *fd);
/* Adds the serialized FileDescriptorSet proto contained in fdss to the context,
* and adds symbol table entries for all the objects defined therein. onredef
diff --git a/upb_enum.h b/upb_enum.h
new file mode 100644
index 0000000..2c4010b
--- /dev/null
+++ b/upb_enum.h
@@ -0,0 +1,60 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ *
+ * upb_enum is a simple object that allows run-time reflection over the values
+ * defined within an enum. */
+
+#ifndef UPB_ENUM_H_
+#define UPB_ENUM_H_
+
+#include <stdint.h>
+#include "upb_table.h"
+
+/* Forward declaration from descriptor.h. */
+struct google_protobuf_EnumDescriptorProto;
+struct google_protobuf_EnumValueDescriptorProto;
+
+struct upb_enum {
+ struct google_protobuf_EnumDescriptorProto *descriptor;
+ struct upb_strtable nametoint;
+ struct upb_inttable inttoname;
+};
+
+struct upb_enum_ntoi_entry {
+ struct upb_strtable_entry e;
+ uint32_t value;
+};
+
+struct upb_enum_iton_entry {
+ struct upb_inttable_entry e;
+ struct upb_string *string;
+};
+
+/* Initializes and frees an enum, respectively. Caller retains ownership of
+ * ed, but it must outlive e. */
+INLINE void upb_enum_init(struct upb_enum *e,
+ struct google_protobuf_EnumDescriptorProto *ed) {
+ int num_values = ed->set_flags.has.value ? ed->value->len : 0;
+ e->descriptor = ed;
+ upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry));
+ upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry));
+
+ for(int i = 0; i < num_values; i++) {
+ google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i];
+ struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name},
+ .value = value->number};
+ struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number},
+ .string = value->name};
+ upb_strtable_insert(&e->nametoint, &ntoi_entry.e);
+ upb_inttable_insert(&e->inttoname, &iton_entry.e);
+ }
+}
+
+INLINE void upb_enum_free(struct upb_enum *e) {
+ upb_strtable_free(&e->nametoint);
+ upb_inttable_free(&e->inttoname);
+}
+
+#endif /* UPB_ENUM_H_ */
diff --git a/upb_msg.h b/upb_msg.h
index 4d85fcc..f7b07f3 100644
--- a/upb_msg.h
+++ b/upb_msg.h
@@ -30,8 +30,8 @@
* management semantics can be used with the format as defined here.
*/
-#ifndef PBSTRUCT_H_
-#define PBSTRUCT_H_
+#ifndef UPB_MSG_H_
+#define UPB_MSG_H_
#include <stdbool.h>
#include <stddef.h>
@@ -51,18 +51,12 @@ struct google_protobuf_FieldDescriptorProto;
/* Structure definition. ******************************************************/
-/* Fields that reference other types have pointers to the other type. */
-union upb_msg_field_ref {
- struct upb_msg *msg; /* Set if type == MESSAGE */
- struct upb_enum *_enum; /* Set if type == ENUM */
-};
-
/* Structure that describes a single field in a message. */
struct upb_msg_field {
struct google_protobuf_FieldDescriptorProto *descriptor;
uint32_t byte_offset; /* Where to find the data. */
- uint32_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
- union upb_msg_field_ref ref;
+ uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
+ union upb_symbol_ref ref;
};
/* Structure that describes a single .proto message type. */
@@ -87,9 +81,9 @@ struct upb_msg {
struct upb_abbrev_msg_field {
uint32_t byte_offset; /* Where to find the data. */
- uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */
+ uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
- union upb_msg_field_ref ref;
+ union upb_symbol_ref ref;
};
struct upb_fieldsbynum_entry {
@@ -282,4 +276,4 @@ INLINE void upb_msg_clear(void *s, struct upb_msg *m)
} /* extern "C" */
#endif
-#endif /* PBSTRUCT_H_ */
+#endif /* UPB_MSG_H_ */
diff --git a/upb_table.h b/upb_table.h
index 4452fc9..0c4df93 100644
--- a/upb_table.h
+++ b/upb_table.h
@@ -102,6 +102,15 @@ INLINE void *upb_inttable_lookup(struct upb_inttable *t,
void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key);
+/* Provides iteration over the table. The order in which the entries are
+ * returned is undefined. Insertions invalidate iterators. The _next
+ * functions return NULL when the end has been reached. */
+void *upb_inttable_begin(struct upb_inttable *t);
+void *upb_inttable_next(struct upb_inttable *t, struct upb_inttable_entry *cur);
+
+void *upb_strtable_begin(struct upb_strtable *t);
+void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback