From 2da3b081c47fe8fe7476da228323d2040e876f56 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 28 Jun 2009 09:31:46 -0700 Subject: More work on upb_context. --- upb.h | 16 ++++++ upb_context.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++------- upb_context.h | 27 ++------- upb_enum.h | 60 ++++++++++++++++++++ upb_msg.h | 20 +++---- upb_table.h | 9 +++ 6 files changed, 256 insertions(+), 56 deletions(-) create mode 100644 upb_enum.h diff --git a/upb.h b/upb.h index bb1563a..5197395 100644 --- a/upb.h +++ b/upb.h @@ -29,6 +29,9 @@ extern "C" { /* The maximum that any submessages can be nested. Matches proto2's limit. */ #define UPB_MAX_NESTING 64 +/* The maximum number of fields that any one .proto type can have. */ +#define UPB_MAX_FIELDS (1<<16) + /* Represents a string or bytes. */ struct upb_string { /* We expect the data to be 8-bit clean (uint8_t), but char* is such an @@ -97,6 +100,19 @@ struct upb_tag { upb_wire_type_t wire_type; }; +enum upb_symbol_type { + UPB_SYM_MESSAGE, + UPB_SYM_ENUM, + UPB_SYM_SERVICE, + UPB_SYM_EXTENSION +}; + +union upb_symbol_ref { + struct upb_msg *msg; + struct upb_enum *_enum; + struct upb_svc *svc; +}; + /* Status codes used as a return value. */ typedef enum upb_status { UPB_STATUS_OK = 0, diff --git a/upb_context.c b/upb_context.c index db20d89..642270a 100644 --- a/upb_context.c +++ b/upb_context.c @@ -8,8 +8,10 @@ #include #include "descriptor.h" #include "upb_context.h" +#include "upb_enum.h" +#include "upb_msg.h" -int memrchr(char *data, char c, size_t len) +static int memrchr(char *data, char c, size_t len) { int off = len-1; while(off > 0 && data[off] != c) --off; @@ -20,7 +22,7 @@ bool upb_context_init(struct upb_context *c) { upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry)); /* Add all the types in descriptor.proto so we can parse descriptors. */ - if(!upb_context_addfd(c, &google_protobuf_filedescriptor, UPB_ONREDEF_ERROR)) + if(!upb_context_addfd(c, &google_protobuf_filedescriptor)) return false; /* Indicates that upb is buggy or corrupt. */ return true; } @@ -36,9 +38,9 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c, return upb_strtable_lookup(&c->symtab, symbol); } -struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, - struct upb_string *base, - struct upb_string *symbol) +static struct upb_symtab_entry *resolve(struct upb_strtable *t, + struct upb_string *base, + struct upb_string *symbol) { if(base->byte_len + symbol->byte_len + 1 >= UPB_SYM_MAX_LENGTH || symbol->byte_len == 0) return NULL; @@ -47,7 +49,7 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, /* Symbols starting with '.' are absolute, so we do a single lookup. */ struct upb_string sym_str = {.data = symbol->data+1, .byte_len = symbol->byte_len-1}; - return upb_context_lookup(c, &sym_str); + return upb_strtable_lookup(t, &sym_str); } else { /* Remove components from base until we find an entry or run out. */ char sym[UPB_SYM_MAX_LENGTH+1]; @@ -60,7 +62,7 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, memcpy(sym + baselen + 1, symbol->data, symbol->byte_len); sym_str.byte_len = baselen + symbol->byte_len + 1; - struct upb_symtab_entry *e = upb_context_lookup(c, &sym_str); + struct upb_symtab_entry *e = upb_strtable_lookup(t, &sym_str); if (e) return e; else if(baselen == 0) return NULL; /* No more scopes to try. */ @@ -69,23 +71,161 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, } } +union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2, + struct upb_string *base, struct upb_string *sym, + enum upb_symbol_type expected_type) { + union upb_symbol_ref nullref = {.msg = NULL}; + struct upb_symtab_entry *e = resolve(t1, base, sym); + if(e == NULL) e = resolve(t2, base, sym); + + if(e && e->type == expected_type) return e->ref; + else return nullref; +} + + +struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, + struct upb_string *base, + struct upb_string *symbol) { + return resolve(&c->symtab, base, symbol); +} + +/* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns the returned string and must free it. */ +static struct upb_string join(struct upb_string *base, struct upb_string *name) { + size_t len = base->byte_len + name->byte_len; + if(base->byte_len > 0) len++; /* For the separator. */ + struct upb_string joined = {.byte_len=len, .data=malloc(len)}; + if(base->byte_len > 0) { + /* nested_base = base + '.' + d->name */ + memcpy(joined.data, base->data, base->byte_len); + joined.data[base->byte_len] = UPB_CONTEXT_SEPARATOR; + memcpy(&joined.data[base->byte_len+1], name->data, name->byte_len); + } else { + memcpy(joined.data, name->data, name->byte_len); + } + return joined; +} + +static bool insert_enum(struct upb_strtable *t, + google_protobuf_EnumDescriptorProto *ed, + struct upb_string *base) +{ + if(!ed->set_flags.has.name) return false; + + /* We own this and must free it on destruct. */ + struct upb_string fqname = join(base, ed->name); + + /* Redefinition within a FileDescriptorProto is not allowed. */ + if(upb_strtable_lookup(t, &fqname)) { + free(fqname.data); + return false; + } + + struct upb_symtab_entry e; + e.e.key = fqname; + e.type = UPB_SYM_ENUM; + e.ref._enum = malloc(sizeof(*e.ref._enum)); + upb_enum_init(e.ref._enum, ed); + upb_strtable_insert(t, &e.e); + + return true; +} + +static bool insert_message(struct upb_strtable *t, + google_protobuf_DescriptorProto *d, + struct upb_string *base) +{ + if(!d->set_flags.has.name) return false; + + /* We own this and must free it on destruct. */ + struct upb_string fqname = join(base, d->name); + + /* Redefinition within a FileDescriptorProto is not allowed. */ + if(upb_strtable_lookup(t, d->name)) { + free(fqname.data); + return false; + } + + struct upb_symtab_entry e; + e.e.key = fqname; + e.type = UPB_SYM_MESSAGE; + e.ref.msg = malloc(sizeof(*e.ref.msg)); + upb_msg_init(e.ref.msg, d); + upb_strtable_insert(t, &e.e); + + /* Add nested messages and enums. */ + if(d->set_flags.has.nested_type) + for(unsigned int i = 0; i < d->nested_type->len; i++) + if(!insert_message(t, d->nested_type->elements[i], &fqname)) + return false; + + if(d->set_flags.has.enum_type) + for(unsigned int i = 0; i < d->enum_type->len; i++) + if(!insert_enum(t, d->enum_type->elements[i], &fqname)) + return false; + + return true; +} + bool upb_context_addfd(struct upb_context *c, - google_protobuf_FileDescriptorProto *fd, - int onredef) + google_protobuf_FileDescriptorProto *fd) { + struct upb_string package = {.byte_len=0}; + if(fd->set_flags.has.package) package = *fd->package; + + /* We want the entire add operation to be atomic, so we initially insert into + * this temporary map of symbols. Once we have verified that there are no + * errors (all symbols can be resolved and no illegal redefinitions occurred) + * only then do we insert into the context's table. */ + struct upb_strtable tmp; + int symcount = (fd->set_flags.has.message_type ? fd->message_type->len : 0) + + (fd->set_flags.has.enum_type ? fd->enum_type->len : 0) + + (fd->set_flags.has.service ? fd->service->len : 0); + upb_strtable_init(&tmp, symcount, sizeof(struct upb_symtab_entry)); + /* TODO: properly handle redefinitions and unresolvable symbols. */ - if(fd->set_flags.has.message_type) { - for(unsigned int i = 0; i < fd->message_type->len; i++) { - struct google_protobuf_DescriptorProto *d = fd->message_type->elements[i]; - if(!d->set_flags.has.name) return false; - struct upb_symtab_entry e; - e.e.key = *d->name; - e.type = UPB_SYM_MESSAGE; - e.p.msg = malloc(sizeof(*e.p.msg)); - upb_msg_init(e.p.msg, d); - upb_strtable_insert(&c->symtab, &e.e); + if(fd->set_flags.has.message_type) + for(unsigned int i = 0; i < fd->message_type->len; i++) + if(!insert_message(&tmp, fd->message_type->elements[i], &package)) + goto error; + + if(fd->set_flags.has.enum_type) + for(unsigned int i = 0; i < fd->enum_type->len; i++) + if(!insert_enum(&tmp, fd->enum_type->elements[i], &package)) + goto error; + + /* TODO: handle extensions and services. */ + + /* Attempt to resolve all references. */ + struct upb_symtab_entry *e; + for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) { + if(upb_strtable_lookup(&c->symtab, &e->e.key)) + goto error; /* Redefinition prohibited. */ + if(e->type == UPB_SYM_MESSAGE) { + struct upb_msg *m = e->ref.msg; + for(unsigned int i = 0; i < m->num_fields; i++) { + struct upb_msg_field *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd = f->descriptor; + union upb_symbol_ref ref; + if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) + ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_MESSAGE); + else if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM) + ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_ENUM); + else + continue; /* No resolving necessary. */ + if(!ref.msg) goto error; + upb_msg_ref(m, f, &ref); + } } } - /* TODO: handle enums, extensions, and services. */ + + /* All references were successfully resolved -- add to the symbol table. */ + for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) + upb_strtable_insert(&c->symtab, &e->e); + return true; + +error: + return false; } diff --git a/upb_context.h b/upb_context.h index d3e6904..3b84889 100644 --- a/upb_context.h +++ b/upb_context.h @@ -19,21 +19,10 @@ extern "C" { #endif -enum upb_symbol_type { - UPB_SYM_MESSAGE, - UPB_SYM_ENUM, - UPB_SYM_SERVICE, - UPB_SYM_EXTENSION -}; - struct upb_symtab_entry { struct upb_strtable_entry e; enum upb_symbol_type type; - union { - struct upb_msg *msg; - struct upb_enum *_enum; - struct upb_svc *svc; - } p; + union upb_symbol_ref ref; }; struct upb_context { @@ -73,23 +62,15 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c, /* Adding symbols. ************************************************************/ -/* Enum controlling what happens if a symbol is redefined. */ -enum upb_onredef { - UPB_ONREDEF_REPLACE, /* Replace existing definition (must be same type). */ - UPB_ONREDEF_KEEP, /* Keep existing definition, ignore new one. */ - UPB_ONREDEF_ERROR /* Error on redefinition. */ -}; - /* Adds the definitions in the given file descriptor to this context. All * types that are referenced from fd must have previously been defined (or be - * defined in fd). onredef controls the behavior in the case that fd attempts - * to define a type that is already defined. + * defined in fd). fd may not attempt to define any names that are already + * defined in this context. * * Caller retains ownership of fd, but the context will contain references to * it, so it must outlive the context. */ bool upb_context_addfd(struct upb_context *c, - google_protobuf_FileDescriptorProto *fd, - int onredef); + google_protobuf_FileDescriptorProto *fd); /* Adds the serialized FileDescriptorSet proto contained in fdss to the context, * and adds symbol table entries for all the objects defined therein. onredef diff --git a/upb_enum.h b/upb_enum.h new file mode 100644 index 0000000..2c4010b --- /dev/null +++ b/upb_enum.h @@ -0,0 +1,60 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * upb_enum is a simple object that allows run-time reflection over the values + * defined within an enum. */ + +#ifndef UPB_ENUM_H_ +#define UPB_ENUM_H_ + +#include +#include "upb_table.h" + +/* Forward declaration from descriptor.h. */ +struct google_protobuf_EnumDescriptorProto; +struct google_protobuf_EnumValueDescriptorProto; + +struct upb_enum { + struct google_protobuf_EnumDescriptorProto *descriptor; + struct upb_strtable nametoint; + struct upb_inttable inttoname; +}; + +struct upb_enum_ntoi_entry { + struct upb_strtable_entry e; + uint32_t value; +}; + +struct upb_enum_iton_entry { + struct upb_inttable_entry e; + struct upb_string *string; +}; + +/* Initializes and frees an enum, respectively. Caller retains ownership of + * ed, but it must outlive e. */ +INLINE void upb_enum_init(struct upb_enum *e, + struct google_protobuf_EnumDescriptorProto *ed) { + int num_values = ed->set_flags.has.value ? ed->value->len : 0; + e->descriptor = ed; + upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry)); + upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry)); + + for(int i = 0; i < num_values; i++) { + google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i]; + struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name}, + .value = value->number}; + struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number}, + .string = value->name}; + upb_strtable_insert(&e->nametoint, &ntoi_entry.e); + upb_inttable_insert(&e->inttoname, &iton_entry.e); + } +} + +INLINE void upb_enum_free(struct upb_enum *e) { + upb_strtable_free(&e->nametoint); + upb_inttable_free(&e->inttoname); +} + +#endif /* UPB_ENUM_H_ */ diff --git a/upb_msg.h b/upb_msg.h index 4d85fcc..f7b07f3 100644 --- a/upb_msg.h +++ b/upb_msg.h @@ -30,8 +30,8 @@ * management semantics can be used with the format as defined here. */ -#ifndef PBSTRUCT_H_ -#define PBSTRUCT_H_ +#ifndef UPB_MSG_H_ +#define UPB_MSG_H_ #include #include @@ -51,18 +51,12 @@ struct google_protobuf_FieldDescriptorProto; /* Structure definition. ******************************************************/ -/* Fields that reference other types have pointers to the other type. */ -union upb_msg_field_ref { - struct upb_msg *msg; /* Set if type == MESSAGE */ - struct upb_enum *_enum; /* Set if type == ENUM */ -}; - /* Structure that describes a single field in a message. */ struct upb_msg_field { struct google_protobuf_FieldDescriptorProto *descriptor; uint32_t byte_offset; /* Where to find the data. */ - uint32_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ - union upb_msg_field_ref ref; + uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ + union upb_symbol_ref ref; }; /* Structure that describes a single .proto message type. */ @@ -87,9 +81,9 @@ struct upb_msg { struct upb_abbrev_msg_field { uint32_t byte_offset; /* Where to find the data. */ - uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */ + uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ - union upb_msg_field_ref ref; + union upb_symbol_ref ref; }; struct upb_fieldsbynum_entry { @@ -282,4 +276,4 @@ INLINE void upb_msg_clear(void *s, struct upb_msg *m) } /* extern "C" */ #endif -#endif /* PBSTRUCT_H_ */ +#endif /* UPB_MSG_H_ */ diff --git a/upb_table.h b/upb_table.h index 4452fc9..0c4df93 100644 --- a/upb_table.h +++ b/upb_table.h @@ -102,6 +102,15 @@ INLINE void *upb_inttable_lookup(struct upb_inttable *t, void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key); +/* Provides iteration over the table. The order in which the entries are + * returned is undefined. Insertions invalidate iterators. The _next + * functions return NULL when the end has been reached. */ +void *upb_inttable_begin(struct upb_inttable *t); +void *upb_inttable_next(struct upb_inttable *t, struct upb_inttable_entry *cur); + +void *upb_strtable_begin(struct upb_strtable *t); +void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur); + #ifdef __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3