From 18291eedc3cb6bf4386698620ad9d02ad367126a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 5 Dec 2009 10:32:53 -0800 Subject: Make defs refcounted, rename upb_context->upbsymtab. There is currently a memory leak when type definitions form cycles. This will need to be dealt with. --- src/upb.h | 11 +- src/upb_context.c | 350 ------------------------------- src/upb_context.h | 89 -------- src/upb_def.c | 616 +++++++++++++++++++++++++++++++++++++++++++++--------- src/upb_def.h | 294 +++++++++++++------------- src/upb_mm.c | 2 +- src/upb_msg.c | 40 ++-- src/upb_msg.h | 2 +- src/upb_parse.c | 8 +- src/upb_text.c | 32 +-- 10 files changed, 701 insertions(+), 743 deletions(-) delete mode 100644 src/upb_context.c delete mode 100644 src/upb_context.h (limited to 'src') diff --git a/src/upb.h b/src/upb.h index 6620bcd..ff7c86e 100644 --- a/src/upb.h +++ b/src/upb.h @@ -60,14 +60,15 @@ typedef uint8_t upb_wire_type_t; typedef uint8_t upb_field_type_t; // For referencing the type constants tersely. -#define UPB_TYPENUM(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type +#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type +#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPENUM(GROUP) || type == UPB_TYPENUM(MESSAGE); + return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); } INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPENUM(STRING) || type == UPB_TYPENUM(BYTES); + return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); } // Info for a given field type. @@ -155,7 +156,7 @@ INLINE union upb_value upb_value_read(union upb_value_ptr ptr, union upb_value val; #define CASE(t, member_name) \ - case UPB_TYPENUM(t): val.member_name = *ptr.member_name; break; + case UPB_TYPE(t): val.member_name = *ptr.member_name; break; switch(ft) { CASE(DOUBLE, _double) @@ -191,7 +192,7 @@ INLINE union upb_value upb_value_read(union upb_value_ptr ptr, INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val, upb_field_type_t ft) { #define CASE(t, member_name) \ - case UPB_TYPENUM(t): *ptr.member_name = val.member_name; break; + case UPB_TYPE(t): *ptr.member_name = val.member_name; break; switch(ft) { CASE(DOUBLE, _double) diff --git a/src/upb_context.c b/src/upb_context.c deleted file mode 100644 index 469f879..0000000 --- a/src/upb_context.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include -#include "descriptor.h" -#include "upb_context.h" -#include "upb_def.h" -#include "upb_mm.h" - -struct upb_symtab_entry { - struct upb_strtable_entry e; - struct upb_def *def; -}; - -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; -} - -void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, - google_protobuf_FileDescriptorProto *fd, bool sort, - struct upb_status *status); - -struct upb_context *upb_context_new() -{ - struct upb_context *c = malloc(sizeof(*c)); - upb_atomic_refcount_init(&c->refcount, 1); - upb_rwlock_init(&c->lock); - upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry)); - upb_strtable_init(&c->psymtab, 16, sizeof(struct upb_symtab_entry)); - /* Add all the types in descriptor.proto so we can parse descriptors. */ - google_protobuf_FileDescriptorProto *fd = - upb_file_descriptor_set->file->elements[0]; /* We know there is only 1. */ - struct upb_status status = UPB_STATUS_INIT; - addfd(&c->psymtab, &c->symtab, fd, false, &status); - if(!upb_ok(&status)) { - fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg); - assert(false); - return NULL; /* Indicates that upb is buggy or corrupt. */ - } - struct upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet"); - struct upb_symtab_entry *e = upb_strtable_lookup(&c->psymtab, &name); - assert(e); - c->fds_msgdef = upb_downcast_msgdef(e->def); - return c; -} - -static void free_symtab(struct upb_strtable *t) -{ - struct upb_symtab_entry *e = upb_strtable_begin(t); - for(; e; e = upb_strtable_next(t, &e->e)) { - upb_def_unref(e->def); - upb_string_unref(e->e.key); - } - upb_strtable_free(t); -} - -static void free_context(struct upb_context *c) -{ - free_symtab(&c->symtab); - free_symtab(&c->psymtab); -} - -void upb_context_unref(struct upb_context *c) -{ - if(upb_atomic_unref(&c->refcount)) { - upb_rwlock_wrlock(&c->lock); - free_context(c); - upb_rwlock_unlock(&c->lock); - upb_rwlock_destroy(&c->lock); - free(c); - } -} - -struct upb_def **upb_context_getandref_defs(struct upb_context *c, int *count) -{ - upb_rwlock_wrlock(&c->lock); - *count = upb_strtable_count(&c->symtab); - struct upb_def **defs = malloc(sizeof(*defs) * (*count)); - struct upb_symtab_entry *e = upb_strtable_begin(&c->symtab); - int i = 0; - for(; e; e = upb_strtable_next(&c->symtab, &e->e), i++) { - assert(e->def); - defs[i] = e->def; - upb_def_ref(defs[i]); - } - assert(*count == i); - upb_rwlock_unlock(&c->lock); - return defs; -} - -struct upb_def *upb_context_lookup(struct upb_context *c, - struct upb_string *sym) -{ - upb_rwlock_rdlock(&c->lock); - struct upb_symtab_entry *e = upb_strtable_lookup(&c->symtab, sym); - upb_rwlock_unlock(&c->lock); - return e ? e->def : NULL; -} - -/* Given a symbol and the base symbol inside which it is defined, find the - * symbol's definition in t. */ -static struct upb_symtab_entry *resolve(struct upb_strtable *t, - struct upb_string *base, - struct upb_string *symbol) -{ - if(base->byte_len + symbol->byte_len + 1 >= UPB_SYMBOL_MAXLEN || - symbol->byte_len == 0) return NULL; - - if(symbol->ptr[0] == UPB_SYMBOL_SEPARATOR) { - /* Symbols starting with '.' are absolute, so we do a single lookup. */ - struct upb_string sym_str = {.ptr = symbol->ptr+1, - .byte_len = symbol->byte_len-1}; - return upb_strtable_lookup(t, &sym_str); - } else { - /* Remove components from base until we find an entry or run out. */ - char sym[UPB_SYMBOL_MAXLEN+1]; - struct upb_string sym_str = {.ptr = sym}; - int baselen = base->byte_len; - while(1) { - /* sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol */ - memcpy(sym, base->ptr, baselen); - sym[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(sym + baselen + 1, symbol->ptr, symbol->byte_len); - sym_str.byte_len = baselen + symbol->byte_len + 1; - - struct upb_symtab_entry *e = upb_strtable_lookup(t, &sym_str); - if (e) return e; - else if(baselen == 0) return NULL; /* No more scopes to try. */ - - baselen = my_memrchr(base->ptr, UPB_SYMBOL_SEPARATOR, baselen); - } - } -} - -/* Tries to resolve a symbol in two different tables. */ -struct upb_def *resolve2(struct upb_strtable *t1, struct upb_strtable *t2, - struct upb_string *base, struct upb_string *sym, - enum upb_def_type expected_type) { - struct upb_symtab_entry *e = resolve(t1, base, sym); - if(e == NULL) e = resolve(t2, base, sym); - if(e && e->def->type == expected_type) return e->def; - return NULL; -} - - -struct upb_def *upb_context_resolve(struct upb_context *c, - struct upb_string *base, - struct upb_string *symbol) { - upb_rwlock_rdlock(&c->lock); - struct upb_symtab_entry *e = resolve(&c->symtab, base, symbol); - upb_rwlock_unlock(&c->lock); - return e ? e->def : NULL; -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns the returned string and must free it. */ -static struct upb_string *join(struct upb_string *base, struct upb_string *name) { - size_t len = base->byte_len + name->byte_len; - if(base->byte_len > 0) len++; /* For the separator. */ - struct upb_string *joined = upb_string_new(); - upb_string_resize(joined, len); - if(base->byte_len > 0) { - /* nested_base = base + '.' + d->name */ - memcpy(joined->ptr, base->ptr, base->byte_len); - joined->ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR; - memcpy(&joined->ptr[base->byte_len+1], name->ptr, name->byte_len); - } else { - memcpy(joined->ptr, name->ptr, name->byte_len); - } - return joined; -} - -static void insert_enum(struct upb_strtable *t, - google_protobuf_EnumDescriptorProto *ed, - struct upb_string *base, - struct upb_status *status) -{ - if(!ed->set_flags.has.name) { - upb_seterr(status, UPB_STATUS_ERROR, - "enum in context '" UPB_STRFMT "' does not have a name", - UPB_STRARG(base)); - return; - } - - struct upb_string *fqname = join(base, ed->name); - if(upb_strtable_lookup(t, fqname)) { - upb_seterr(status, UPB_STATUS_ERROR, - "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(fqname)); - upb_string_unref(fqname); - return; - } - - struct upb_symtab_entry e; - e.e.key = fqname; // Donating our ref to the table. - e.def = (struct upb_def*)upb_enumdef_new(ed, fqname); - upb_strtable_insert(t, &e.e); -} - -static void insert_message(struct upb_strtable *t, - google_protobuf_DescriptorProto *d, - struct upb_string *base, bool sort, - struct upb_status *status) -{ - if(!d->set_flags.has.name) { - upb_seterr(status, UPB_STATUS_ERROR, - "message in context '" UPB_STRFMT "' does not have a name", - UPB_STRARG(base)); - return; - } - - /* We own this and must free it on destruct. */ - struct upb_string *fqname = join(base, d->name); - - if(upb_strtable_lookup(t, fqname)) { - upb_seterr(status, UPB_STATUS_ERROR, - "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(fqname)); - upb_string_unref(fqname); - return; - } - - struct upb_symtab_entry e; - e.e.key = fqname; // Donating our ref to the table. - struct upb_fielddef *fielddefs = malloc(sizeof(*fielddefs) * d->field->len); - for (unsigned int i = 0; i < d->field->len; i++) { - google_protobuf_FieldDescriptorProto *fd = d->field->elements[i]; - upb_fielddef_init(&fielddefs[i], fd); - } - if(sort) upb_fielddef_sort(fielddefs, d->field->len); - e.def = (struct upb_def*)upb_msgdef_new(fielddefs, d->field->len, fqname); - upb_strtable_insert(t, &e.e); - - /* Add nested messages and enums. */ - if(d->set_flags.has.nested_type) - for(unsigned int i = 0; i < d->nested_type->len; i++) - insert_message(t, d->nested_type->elements[i], fqname, sort, status); - - if(d->set_flags.has.enum_type) - for(unsigned int i = 0; i < d->enum_type->len; i++) - insert_enum(t, d->enum_type->elements[i], fqname, status); -} - -void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, - google_protobuf_FileDescriptorProto *fd, bool sort, - struct upb_status *status) -{ - struct upb_string pkg = {.byte_len=0}; - if(fd->set_flags.has.package) pkg = *fd->package; - - if(fd->set_flags.has.message_type) - for(unsigned int i = 0; i < fd->message_type->len; i++) - insert_message(addto, fd->message_type->elements[i], &pkg, sort, status); - - if(fd->set_flags.has.enum_type) - for(unsigned int i = 0; i < fd->enum_type->len; i++) - insert_enum(addto, fd->enum_type->elements[i], &pkg, status); - - if(!upb_ok(status)) return; - - /* TODO: handle extensions and services. */ - - /* Attempt to resolve all references. */ - struct upb_symtab_entry *e; - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - if(upb_strtable_lookup(existingdefs, e->e.key)) { - upb_seterr(status, UPB_STATUS_ERROR, - "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(e->e.key)); - return; - } - if(e->def->type == UPB_DEF_MESSAGE) { - struct upb_msgdef *m = upb_downcast_msgdef(e->def); - for(unsigned int i = 0; i < m->num_fields; i++) { - struct upb_fielddef *f = &m->fields[i]; - if(!upb_issubmsg(f) && f->type != UPB_TYPENUM(ENUM)) { - // No resolving necessary. - continue; - } - struct upb_def *def; - struct upb_string *name = upb_downcast_unresolveddef(f->def)->name; - if(upb_issubmsg(f)) - def = resolve2(existingdefs, addto, e->e.key, name, UPB_DEF_MESSAGE); - else if(f->type == UPB_TYPENUM(ENUM)) - def = resolve2(existingdefs, addto, e->e.key, name, UPB_DEF_ENUM); - if(!def) { - upb_seterr(status, UPB_STATUS_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(e->e.key)); - return; - } - upb_msgdef_resolve(m, f, def); - } - } - } -} - -void upb_context_addfds(struct upb_context *c, - google_protobuf_FileDescriptorSet *fds, - struct upb_status *status) -{ - if(fds->set_flags.has.file) { - /* Insert new symbols into a temporary table until we have verified that - * the descriptor is valid. */ - struct upb_strtable tmp; - upb_strtable_init(&tmp, 0, sizeof(struct upb_symtab_entry)); - upb_rwlock_rdlock(&c->lock); - for(uint32_t i = 0; i < fds->file->len; i++) { - addfd(&tmp, &c->symtab, fds->file->elements[i], true, status); - if(!upb_ok(status)) { - free_symtab(&tmp); - upb_rwlock_unlock(&c->lock); - return; - } - } - upb_rwlock_unlock(&c->lock); - - /* Everything was successfully added, copy from the tmp symtable. */ - struct upb_symtab_entry *e; - { - upb_rwlock_wrlock(&c->lock); - for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) - upb_strtable_insert(&c->symtab, &e->e); - upb_rwlock_unlock(&c->lock); - } - upb_strtable_free(&tmp); - } - return; -} - -void upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str, - struct upb_status *status) -{ - struct upb_msg *fds = upb_msg_new(c->fds_msgdef); - upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len, status); - if(!upb_ok(status)) return; - upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds, status); - return; -} diff --git a/src/upb_context.h b/src/upb_context.h deleted file mode 100644 index 177b42e..0000000 --- a/src/upb_context.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * A context represents a namespace of proto definitions, sort of like an - * interpreter's symbol table. It is empty when first constructed. Clients - * add definitions to the context by supplying unserialized or serialized - * descriptors (as defined in descriptor.proto). - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_CONTEXT_H_ -#define UPB_CONTEXT_H_ - -#include "upb.h" -#include "upb_table.h" -#include "upb_atomic.h" - -struct google_protobuf_FileDescriptorProto; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Definitions. ***************************************************************/ - -struct upb_context { - upb_atomic_refcount_t refcount; - upb_rwlock_t lock; // Protects all members except the refcount. - struct upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. - - // Our symbol tables; we own refs to the defs therein. - struct upb_strtable symtab; // The context's symbol table. - struct upb_strtable psymtab; // Private symbols, for internal use. -}; - -// Initializes a upb_context. Contexts are not freed explicitly, but unref'd -// when the caller is done with them. -struct upb_context *upb_context_new(void); -INLINE void upb_context_ref(struct upb_context *c) { - upb_atomic_ref(&c->refcount); -} -void upb_context_unref(struct upb_context *c); - -/* Looking up symbols. ********************************************************/ - -// Resolves the given symbol using the rules described in descriptor.proto, -// namely: -// -// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like -// scoping rules are used to find the type (i.e. first the nested types -// within this message are searched, then within the parent, on up to the -// root namespace). -// -// Returns NULL if no such symbol has been defined. -struct upb_def *upb_context_resolve(struct upb_context *c, - struct upb_string *base, - struct upb_string *symbol); - -// Find an entry in the symbol table with this exact name. Returns NULL if no -// such symbol name has been defined. -struct upb_def *upb_context_lookup(struct upb_context *c, - struct upb_string *sym); - -// Gets an array of pointers to all currently active defs in this context. The -// caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside. -struct upb_def **upb_context_getandref_defs(struct upb_context *c, int *count); - -/* Adding symbols. ************************************************************/ - -// Adds the definitions in the given file descriptor to this context. All -// types that are referenced from fd must have previously been defined (or be -// defined in fd). fd may not attempt to define any names that are already -// defined in this context. Caller retains ownership of fd. status indicates -// whether the operation was successful or not, and the error message (if any). -struct google_protobuf_FileDescriptorSet; -void upb_context_addfds(struct upb_context *c, - struct google_protobuf_FileDescriptorSet *fds, - struct upb_status *status); -// Like the above, but also parses the FileDescriptorSet from fds. -void upb_context_parsefds(struct upb_context *c, struct upb_string *fds, - struct upb_status *status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_PARSE_H_ */ diff --git a/src/upb_def.c b/src/upb_def.c index 32675f5..ee9bd21 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -4,8 +4,10 @@ * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. */ -#include "upb_def.h" #include "descriptor.h" +#include "upb_def.h" +#include "upb_mm.h" +#include "upb_msg.h" /* Rounds p up to the next multiple of t. */ #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) @@ -15,46 +17,33 @@ static int div_round_up(int numerator, int denominator) { return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; } -/* Callback for sorting fields. */ -static int compare_fields(const void *e1, const void *e2) { - const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1; - const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2; - /* Required fields go before non-required. */ - bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - if(req1 != req2) { - return req2 - req1; - } else { - /* Within required and non-required field lists, list in number order. - * TODO: consider ordering by data size to reduce padding. */ - return fd1->number - fd2->number; - } -} - -/* Callback for sorting fields. */ -static int compare_fields2(const void *e1, const void *e2) { - const struct upb_fielddef *f1 = e1; - const struct upb_fielddef *f2 = e2; - /* Required fields go before non-required. */ - bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - if(req1 != req2) { - return req2 - req1; - } else { - /* Within required and non-required field lists, list in number order. - * TODO: consider ordering by data size to reduce padding. */ - return f1->number - f2->number; - } -} +/* upb_def ********************************************************************/ -void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) -{ - qsort(fds, num, sizeof(*fds), compare_fields); -} +static void msgdef_free(struct upb_msgdef *m); +static void enumdef_free(struct upb_enumdef *e); +static void unresolveddef_free(struct upb_unresolveddef *u); -void upb_fielddef_sort(struct upb_fielddef *defs, size_t num) +void _upb_def_free(struct upb_def *def) { - qsort(defs, num, sizeof(*defs), compare_fields2); + switch(def->type) { + case UPB_DEF_MSG: + msgdef_free(upb_downcast_msgdef(def)); + break; + case UPB_DEF_ENUM: + enumdef_free(upb_downcast_enumdef(def)); + break; + case UPB_DEF_SVC: + assert(false); /* Unimplemented. */ + break; + case UPB_DEF_EXT: + assert(false); /* Unimplemented. */ + break; + case UPB_DEF_UNRESOLVED: + unresolveddef_free(upb_downcast_unresolveddef(def)); + break; + default: + assert(false); + } } void upb_def_init(struct upb_def *def, enum upb_def_type type, @@ -69,62 +58,128 @@ void upb_def_uninit(struct upb_def *def) { upb_string_unref(def->fqname); } -void upb_fielddef_init(struct upb_fielddef *f, - struct google_protobuf_FieldDescriptorProto *fd) +/* upb_unresolveddef **********************************************************/ + +struct upb_unresolveddef { + struct upb_def base; + struct upb_string *name; +}; + +static struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *str) { + struct upb_unresolveddef *def = malloc(sizeof(*def)); + struct upb_string *name = upb_strdup(str); + upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name); + def->name = name; + return def; +} + +static void unresolveddef_free(struct upb_unresolveddef *def) { + upb_def_uninit(&def->base); + upb_string_unref(def->name); + free(def); +} + +/* upb_fielddef ***************************************************************/ + +static void fielddef_init(struct upb_fielddef *f, + google_protobuf_FieldDescriptorProto *fd) { f->type = fd->type; f->label = fd->label; f->number = fd->number; f->name = upb_strdup(fd->name); f->def = NULL; + assert(fd->set_flags.has.type_name == upb_hasdef(f)); if(fd->set_flags.has.type_name) { - f->def = (struct upb_def*)upb_unresolveddef_new(fd->type_name); + f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name)); } } -void upb_fielddef_uninit(struct upb_fielddef *f) +static struct upb_fielddef *fielddef_new( + google_protobuf_FieldDescriptorProto *fd) { + struct upb_fielddef *f = malloc(sizeof(*f)); + fielddef_init(f, fd); + return f; +} + +static void fielddef_uninit(struct upb_fielddef *f) { upb_string_unref(f->name); - if(upb_fielddef_hasdef(f)) upb_def_unref(f->def); + if(upb_hasdef(f)) { + upb_def_unref(f->def); + } } -struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f) +static void fielddef_copy(struct upb_fielddef *dst, struct upb_fielddef *src) { - struct upb_fielddef *new_f = malloc(sizeof(*new_f)); - new_f->type = f->type; - new_f->label = f->label; - new_f->number = f->number; - new_f->name = upb_strdup(f->name); - new_f->type = f->type; - new_f->def = NULL; - if(upb_fielddef_hasdef(f)) { - new_f->def = f->def; - upb_def_ref(new_f->def); + *dst = *src; + dst->name = upb_strdup(src->name); + if(upb_hasdef(src)) { + upb_def_ref(dst->def); + } +} + +// Callback for sorting fields. +static int compare_fields(struct upb_fielddef *f1, struct upb_fielddef *f2) { + // Required fields go before non-required. + bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + if(req1 != req2) { + return req2 - req1; + } else { + // Within required and non-required field lists, list in number order. + // TODO: consider ordering by data size to reduce padding. */ + return f1->number - f2->number; } - return new_f; } -struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields, - struct upb_string *fqname) +static int compare_fielddefs(const void *e1, const void *e2) { + return compare_fields(*(void**)e1, *(void**)e2); +} + +static int compare_fds(const void *e1, const void *e2) { + struct upb_fielddef f1, f2; + fielddef_init(&f1, *(void**)e1); + fielddef_init(&f2, *(void**)e2); + int ret = compare_fields(&f1, &f2); + fielddef_uninit(&f1); + fielddef_uninit(&f2); + return ret; +} + +void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) +{ + qsort(fds, num, sizeof(*fds), compare_fds); +} + +static void fielddef_sort(struct upb_fielddef **defs, size_t num) +{ + qsort(defs, num, sizeof(*defs), compare_fielddefs); +} + +/* upb_msgdef *****************************************************************/ + +static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields, + int num_fields, + struct upb_string *fqname) { struct upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->def, UPB_DEF_MESSAGE, fqname); - upb_inttable_init(&m->fields_by_num, num_fields, - sizeof(struct upb_fieldsbynum_entry)); - upb_strtable_init(&m->fields_by_name, num_fields, - sizeof(struct upb_fieldsbyname_entry)); + upb_def_init(&m->base, UPB_DEF_MSG, fqname); + upb_inttable_init(&m->itof, num_fields, sizeof(struct upb_itof_ent)); + upb_strtable_init(&m->ntof, num_fields, sizeof(struct upb_ntof_ent)); m->num_fields = num_fields; m->set_flags_bytes = div_round_up(m->num_fields, 8); // These are incremented in the loop. m->num_required_fields = 0; m->size = m->set_flags_bytes; - m->fields = fields; + m->fields = malloc(sizeof(struct upb_fielddef) * num_fields); size_t max_align = 0; for(int i = 0; i < num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - struct upb_type_info *type_info = &upb_type_info[f->type]; + struct upb_type_info *type_info = &upb_type_info[fields[i]->type]; + fielddef_copy(f, fields[i]); // General alignment rules are: each member must be at an address that is a // multiple of that type's alignment. Also, the size of the structure as @@ -133,75 +188,428 @@ struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields, f->byte_offset = ALIGN_UP(m->size, type_info->align); m->size = f->byte_offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); - if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) { + if(f->label == UPB_LABEL(REQUIRED)) { // We currently rely on the fact that required fields are always sorted // to occur before non-required fields. m->num_required_fields++; } - // Insert into the tables. Note that f->ref will be uninitialized, even in - // the tables' copies of *f, which is why we must update them separately - // in upb_msg_setref() below. - struct upb_fieldsbynum_entry nument = {.e = {.key = f->number}, .f = *f}; - struct upb_fieldsbyname_entry strent = {.e = {.key = upb_strdup(f->name)}, .f = *f}; - upb_inttable_insert(&m->fields_by_num, &nument.e); - upb_strtable_insert(&m->fields_by_name, &strent.e); + // Insert into the tables. + struct upb_itof_ent itof_ent = {{f->number, 0}, f}; + struct upb_ntof_ent ntof_ent = {{upb_strdup(f->name), 0}, f}; + upb_inttable_insert(&m->itof, &itof_ent.e); + upb_strtable_insert(&m->ntof, &ntof_ent.e); } if(max_align > 0) m->size = ALIGN_UP(m->size, max_align); return m; } -void _upb_msgdef_free(struct upb_msgdef *m) +static void msgdef_free(struct upb_msgdef *m) { - upb_def_uninit(&m->def); - upb_inttable_free(&m->fields_by_num); - upb_strtable_free(&m->fields_by_name); - for (unsigned int i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); + //upb_def_uninit(&m->base); + upb_inttable_free(&m->itof); + upb_strtable_free(&m->ntof); + for (unsigned int i = 0; i < m->num_fields; i++) { + fielddef_uninit(&m->fields[i]); + } + upb_def_uninit(&m->base); free(m->fields); free(m); } -void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, - struct upb_def *def) { - struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup( - &m->fields_by_num, f->number, sizeof(struct upb_fieldsbynum_entry)); - struct upb_fieldsbyname_entry *str_e = - upb_strtable_lookup(&m->fields_by_name, f->name); - assert(int_e && str_e); +static void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, + struct upb_def *def) { + (void)m; + upb_def_unref(f->def); f->def = def; - int_e->f.def = def; - str_e->f.def = def; upb_def_ref(def); } -struct upb_enumdef *upb_enumdef_new( - struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname) +/* upb_enumdef ****************************************************************/ + +struct ntoi_ent { + struct upb_strtable_entry e; + uint32_t value; +}; + +struct iton_ent { + struct upb_inttable_entry e; + struct upb_string *string; +}; + +static struct upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed, + struct upb_string *fqname) { struct upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->def, UPB_DEF_ENUM, fqname); + upb_def_init(&e->base, UPB_DEF_ENUM, fqname); int num_values = ed->set_flags.has.value ? ed->value->len : 0; - upb_strtable_init(&e->nametoint, num_values, - sizeof(struct upb_enumdef_ntoi_entry)); - upb_inttable_init(&e->inttoname, num_values, - sizeof(struct upb_enumdef_iton_entry)); + upb_strtable_init(&e->ntoi, num_values, sizeof(struct ntoi_ent)); + upb_inttable_init(&e->iton, num_values, sizeof(struct iton_ent)); for(int i = 0; i < num_values; i++) { google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i]; - struct upb_enumdef_ntoi_entry ntoi_entry = {.e = {.key = upb_strdup(value->name)}, - .value = value->number}; - struct upb_enumdef_iton_entry iton_entry = {.e = {.key = value->number}, - .string = value->name}; - upb_strtable_insert(&e->nametoint, &ntoi_entry.e); - upb_inttable_insert(&e->inttoname, &iton_entry.e); + struct ntoi_ent ntoi_ent = {{upb_strdup(value->name), 0}, value->number}; + struct iton_ent iton_ent = {{value->number, 0}, value->name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); } return e; } -void _upb_enumdef_free(struct upb_enumdef *e) { - upb_def_uninit(&e->def); - upb_strtable_free(&e->nametoint); - upb_inttable_free(&e->inttoname); +static void enumdef_free(struct upb_enumdef *e) { + upb_def_uninit(&e->base); + upb_strtable_free(&e->ntoi); + upb_inttable_free(&e->iton); free(e); } + +static void fill_iter(struct upb_enum_iter *iter, struct ntoi_ent *ent) { + iter->state = ent; + iter->name = ent->e.key; + iter->val = ent->value; +} + +void upb_enum_begin(struct upb_enum_iter *iter, struct upb_enumdef *e) { + // We could iterate over either table here; the choice is arbitrary. + struct ntoi_ent *ent = upb_strtable_begin(&e->ntoi); + iter->e = e; + fill_iter(iter, ent); +} + +void upb_enum_next(struct upb_enum_iter *iter) { + struct ntoi_ent *ent = iter->state; + assert(ent); + ent = upb_strtable_next(&iter->e->ntoi, &ent->e); + iter->state = ent; + if(ent) fill_iter(iter, ent); +} + +bool upb_enum_done(struct upb_enum_iter *iter) { + return iter->state == NULL; +} + +/* symtab internal ***********************************************************/ + +struct symtab_ent { + struct upb_strtable_entry e; + struct upb_def *def; +}; + +/* Search for a character in a string, in reverse. */ +static int my_memrchr(char *data, char c, size_t len) +{ + int off = len-1; + while(off > 0 && data[off] != c) --off; + return off; +} + +/* Given a symbol and the base symbol inside which it is defined, find the + * symbol's definition in t. */ +static struct symtab_ent *resolve(struct upb_strtable *t, + struct upb_string *base, + struct upb_string *symbol) +{ + if(base->byte_len + symbol->byte_len + 1 >= UPB_SYMBOL_MAXLEN || + symbol->byte_len == 0) return NULL; + + if(symbol->ptr[0] == UPB_SYMBOL_SEPARATOR) { + /* Symbols starting with '.' are absolute, so we do a single lookup. */ + struct upb_string sym_str = {.ptr = symbol->ptr+1, + .byte_len = symbol->byte_len-1}; + return upb_strtable_lookup(t, &sym_str); + } else { + /* Remove components from base until we find an entry or run out. */ + char sym[UPB_SYMBOL_MAXLEN+1]; + struct upb_string sym_str = {.ptr = sym}; + int baselen = base->byte_len; + while(1) { + /* sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol */ + memcpy(sym, base->ptr, baselen); + sym[baselen] = UPB_SYMBOL_SEPARATOR; + memcpy(sym + baselen + 1, symbol->ptr, symbol->byte_len); + sym_str.byte_len = baselen + symbol->byte_len + 1; + + struct symtab_ent *e = upb_strtable_lookup(t, &sym_str); + if (e) return e; + else if(baselen == 0) return NULL; /* No more scopes to try. */ + + baselen = my_memrchr(base->ptr, UPB_SYMBOL_SEPARATOR, baselen); + } + } +} + +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns the returned string and must free it. */ +static struct upb_string *join(struct upb_string *base, struct upb_string *name) { + size_t len = base->byte_len + name->byte_len; + if(base->byte_len > 0) len++; /* For the separator. */ + struct upb_string *joined = upb_string_new(); + upb_string_resize(joined, len); + if(base->byte_len > 0) { + /* nested_base = base + '.' + d->name */ + memcpy(joined->ptr, base->ptr, base->byte_len); + joined->ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR; + memcpy(&joined->ptr[base->byte_len+1], name->ptr, name->byte_len); + } else { + memcpy(joined->ptr, name->ptr, name->byte_len); + } + return joined; +} + +static struct upb_string *try_define(struct upb_strtable *t, + struct upb_string *base, + struct upb_string *name, + struct upb_status *status) +{ + if(!name) { + upb_seterr(status, UPB_STATUS_ERROR, + "symbol in context '" UPB_STRFMT "' does not have a name", + UPB_STRARG(base)); + return NULL; + } + struct upb_string *fqname = join(base, name); + if(upb_strtable_lookup(t, fqname)) { + upb_seterr(status, UPB_STATUS_ERROR, + "attempted to redefine symbol '" UPB_STRFMT "'", + UPB_STRARG(fqname)); + upb_string_unref(fqname); + return NULL; + } + return fqname; +} + +static void insert_enum(struct upb_strtable *t, + google_protobuf_EnumDescriptorProto *ed, + struct upb_string *base, + struct upb_status *status) +{ + struct upb_string *name = ed->set_flags.has.name ? ed->name : NULL; + struct upb_string *fqname = try_define(t, base, name, status); + if(!fqname) return; + + struct symtab_ent e; + e.e.key = fqname; // Donating our ref to the table. + e.def = UPB_UPCAST(enumdef_new(ed, fqname)); + upb_strtable_insert(t, &e.e); +} + +static void insert_message(struct upb_strtable *t, + google_protobuf_DescriptorProto *d, + struct upb_string *base, bool sort, + struct upb_status *status) +{ + struct upb_string *name = d->set_flags.has.name ? d->name : NULL; + struct upb_string *fqname = try_define(t, base, name, status); + if(!fqname) return; + + int num_fields = d->set_flags.has.field ? d->field->len : 0; + struct symtab_ent e; + e.e.key = fqname; // Donating our ref to the table. + struct upb_fielddef **fielddefs = malloc(sizeof(*fielddefs) * num_fields); + for (int i = 0; i < num_fields; i++) { + google_protobuf_FieldDescriptorProto *fd = d->field->elements[i]; + fielddefs[i] = fielddef_new(fd); + } + if(sort) fielddef_sort(fielddefs, d->field->len); + e.def = UPB_UPCAST(msgdef_new(fielddefs, d->field->len, fqname)); + upb_strtable_insert(t, &e.e); + for (int i = 0; i < num_fields; i++) { + fielddef_uninit(fielddefs[i]); + free(fielddefs[i]); + } + + /* Add nested messages and enums. */ + if(d->set_flags.has.nested_type) + for(unsigned int i = 0; i < d->nested_type->len; i++) + insert_message(t, d->nested_type->elements[i], fqname, sort, status); + + if(d->set_flags.has.enum_type) + for(unsigned int i = 0; i < d->enum_type->len; i++) + insert_enum(t, d->enum_type->elements[i], fqname, status); +} + +void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, + google_protobuf_FileDescriptorProto *fd, bool sort, + struct upb_status *status) +{ + struct upb_string *pkg; + // Temporary hack until the static data is integrated into our + // memory-management scheme. + bool should_unref; + if(fd->set_flags.has.package) { + pkg = fd->package; + should_unref = false; + } else { + pkg = upb_string_new(); + should_unref = true; + } + + if(fd->set_flags.has.message_type) + for(unsigned int i = 0; i < fd->message_type->len; i++) + insert_message(addto, fd->message_type->elements[i], pkg, sort, status); + + if(fd->set_flags.has.enum_type) + for(unsigned int i = 0; i < fd->enum_type->len; i++) + insert_enum(addto, fd->enum_type->elements[i], pkg, status); + + if(should_unref) upb_string_unref(pkg); + + if(!upb_ok(status)) return; + + /* TODO: handle extensions and services. */ + + // Attempt to resolve all references. + struct symtab_ent *e; + for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { + if(e->def->type != UPB_DEF_MSG) continue; + struct upb_msgdef *m = upb_downcast_msgdef(e->def); + struct upb_string *base = e->e.key; + for(unsigned int i = 0; i < m->num_fields; i++) { + struct upb_fielddef *f = &m->fields[i]; + if(!upb_hasdef(f)) continue; // No resolving necessary. + struct upb_string *name = upb_downcast_unresolveddef(f->def)->name; + struct symtab_ent *found = resolve(existingdefs, base, name); + if(!found) found = resolve(addto, base, name); + upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(!found) { + upb_seterr(status, UPB_STATUS_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return; + } else if(found->def->type != expected) { + upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + return; + } + upb_msgdef_resolve(m, f, found->def); + } + } +} + +/* upb_symtab *****************************************************************/ + +struct upb_symtab *upb_symtab_new() +{ + struct upb_symtab *s = malloc(sizeof(*s)); + upb_atomic_refcount_init(&s->refcount, 1); + upb_rwlock_init(&s->lock); + upb_strtable_init(&s->symtab, 16, sizeof(struct symtab_ent)); + upb_strtable_init(&s->psymtab, 16, sizeof(struct symtab_ent)); + + // Add descriptor.proto types to private symtable so we can parse descriptors. + google_protobuf_FileDescriptorProto *fd = + upb_file_descriptor_set->file->elements[0]; // We know there is only 1. + struct upb_status status = UPB_STATUS_INIT; + addfd(&s->psymtab, &s->symtab, fd, false, &status); + if(!upb_ok(&status)) { + fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg); + assert(false); + return NULL; /* Indicates that upb is buggy or corrupt. */ + } + struct upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet"); + struct symtab_ent *e = upb_strtable_lookup(&s->psymtab, &name); + assert(e); + s->fds_msgdef = upb_downcast_msgdef(e->def); + return s; +} + +static void free_symtab(struct upb_strtable *t) +{ + struct symtab_ent *e = upb_strtable_begin(t); + for(; e; e = upb_strtable_next(t, &e->e)) { + upb_def_unref(e->def); + upb_string_unref(e->e.key); + } + upb_strtable_free(t); +} + +void _upb_symtab_free(struct upb_symtab *s) +{ + free_symtab(&s->symtab); + free_symtab(&s->psymtab); + upb_rwlock_destroy(&s->lock); + free(s); +} + +struct upb_def **upb_symtab_getandref_defs(struct upb_symtab *s, int *count) +{ + upb_rwlock_wrlock(&s->lock); + *count = upb_strtable_count(&s->symtab); + struct upb_def **defs = malloc(sizeof(*defs) * (*count)); + struct symtab_ent *e = upb_strtable_begin(&s->symtab); + int i = 0; + for(; e; e = upb_strtable_next(&s->symtab, &e->e), i++) { + assert(e->def); + defs[i] = e->def; + upb_def_ref(defs[i]); + } + assert(*count == i); + upb_rwlock_unlock(&s->lock); + return defs; +} + +struct upb_def *upb_symtab_lookup(struct upb_symtab *s, + struct upb_string *sym) +{ + upb_rwlock_rdlock(&s->lock); + struct symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + upb_rwlock_unlock(&s->lock); + return e ? e->def : NULL; +} + + +struct upb_def *upb_symtab_resolve(struct upb_symtab *s, + struct upb_string *base, + struct upb_string *symbol) { + upb_rwlock_rdlock(&s->lock); + struct symtab_ent *e = resolve(&s->symtab, base, symbol); + upb_rwlock_unlock(&s->lock); + return e ? e->def : NULL; +} + +void upb_symtab_addfds(struct upb_symtab *s, + google_protobuf_FileDescriptorSet *fds, + struct upb_status *status) +{ + if(fds->set_flags.has.file) { + /* Insert new symbols into a temporary table until we have verified that + * the descriptor is valid. */ + struct upb_strtable tmp; + upb_strtable_init(&tmp, 0, sizeof(struct symtab_ent)); + upb_rwlock_rdlock(&s->lock); + for(uint32_t i = 0; i < fds->file->len; i++) { + addfd(&tmp, &s->symtab, fds->file->elements[i], true, status); + if(!upb_ok(status)) { + free_symtab(&tmp); + upb_rwlock_unlock(&s->lock); + return; + } + } + upb_rwlock_unlock(&s->lock); + + /* Everything was successfully added, copy from the tmp symtable. */ + struct symtab_ent *e; + { + upb_rwlock_wrlock(&s->lock); + for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) + upb_strtable_insert(&s->symtab, &e->e); + upb_rwlock_unlock(&s->lock); + } + upb_strtable_free(&tmp); + } + return; +} + +void upb_symtab_add_desc(struct upb_symtab *s, struct upb_string *desc, + struct upb_status *status) +{ + struct upb_msg *fds = upb_msg_new(s->fds_msgdef); + upb_msg_parsestr(fds, desc->ptr, desc->byte_len, status); + if(!upb_ok(status)) return; + upb_symtab_addfds(s, (google_protobuf_FileDescriptorSet*)fds, status); + upb_msg_unref(fds); + return; +} diff --git a/src/upb_def.h b/src/upb_def.h index e58f01f..7c8cf80 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -7,16 +7,17 @@ * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. - * (TODO: descriptions of extensions and services). + * (TODO: definitions of extensions and services). * - * Defs should be obtained from a upb_context object; the APIs for creating - * them directly are internal-only. + * Defs are obtained from a upb_symtab object. A upb_symtab is empty when + * constructed, and definitions can be added by supplying serialized + * descriptors. * - * Defs are immutable and reference-counted. Contexts reference any defs - * that are the currently in their symbol table. If an extension is loaded - * that adds a field to an existing message, a new msgdef is constructed that - * includes the new field and the old msgdef is unref'd. The old msgdef will - * still be ref'd by message (if any) that were constructed with that msgdef. + * Defs are immutable and reference-counted. Symbol tables reference any defs + * that are the "current" definitions. If an extension is loaded that adds a + * field to an existing message, a new msgdef is constructed that includes the + * new field and the old msgdef is unref'd. The old msgdef will still be ref'd + * by messages (if any) that were constructed with that msgdef. * * This file contains routines for creating and manipulating the definitions * themselves. To create and manipulate actual messages, see upb_msg.h. @@ -32,16 +33,16 @@ extern "C" { #endif -/* "Base class" for defs; defines common members and functions. **************/ +/* upb_def: base class for defs **********************************************/ // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. enum upb_def_type { - UPB_DEF_MESSAGE, + UPB_DEF_MSG, UPB_DEF_ENUM, - UPB_DEF_SERVICE, - UPB_DEF_EXTENSION, - // Represented by a string, symbol hasn't been resolved yet. + UPB_DEF_SVC, + UPB_DEF_EXT, + // Internal-only, placeholder for a def that hasn't be resolved yet. UPB_DEF_UNRESOLVED }; @@ -52,17 +53,40 @@ struct upb_def { upb_atomic_refcount_t refcount; }; -void upb_def_init(struct upb_def *def, enum upb_def_type type, - struct upb_string *fqname); -void upb_def_uninit(struct upb_def *def); -INLINE void upb_def_ref(struct upb_def *def) { upb_atomic_ref(&def->refcount); } +void _upb_def_free(struct upb_def *def); // Must not be called directly! -/* Field definition. **********************************************************/ +// Call to ref/deref a def. +INLINE void upb_def_ref(struct upb_def *def) { + upb_atomic_ref(&def->refcount); +} +INLINE void upb_def_unref(struct upb_def *def) { + if(upb_atomic_unref(&def->refcount)) _upb_def_free(def); +} + +// Downcasts. They are checked only if asserts are enabled. +#define UPB_DOWNCAST_DEF(lower, upper) \ + struct upb_ ## lower; /* Forward-declare. */ \ + INLINE struct upb_ ## lower *upb_downcast_ ## lower(struct upb_def *def) { \ + if(def->type != UPB_DEF_ ## upper) return NULL; \ + return (struct upb_ ## lower*)def; \ + } +UPB_DOWNCAST_DEF(msgdef, MSG); +UPB_DOWNCAST_DEF(enumdef, ENUM); +UPB_DOWNCAST_DEF(svcdef, SVC); +UPB_DOWNCAST_DEF(extdef, EXT); +UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DOWNCAST_DEF + +#define UPB_UPCAST(ptr) (&(ptr)->base) + +/* upb_fielddef ***************************************************************/ // A upb_fielddef describes a single field in a message. It isn't a full def // in the sense that it derives from upb_def. It cannot stand on its own; it // is either a field of a upb_msgdef or contained inside a upb_extensiondef. +// It is also reference-counted. struct upb_fielddef { + upb_atomic_refcount_t refcount; upb_field_type_t type; upb_label_t label; upb_field_number_t number; @@ -85,11 +109,11 @@ INLINE bool upb_isstring(struct upb_fielddef *f) { return upb_isstringtype(f->type); } INLINE bool upb_isarray(struct upb_fielddef *f) { - return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; + return f->label == UPB_LABEL(REPEATED); } // Does the type of this field imply that it should contain an associated def? -INLINE bool upb_fielddef_hasdef(struct upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPENUM(ENUM); +INLINE bool upb_hasdef(struct upb_fielddef *f) { + return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } INLINE bool upb_field_ismm(struct upb_fielddef *f) { @@ -115,31 +139,21 @@ INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f) { else return -1; } -struct google_protobuf_FieldDescriptorProto; - -// Interfaces for constructing/destroying fielddefs. These are internal-only. - -// Initializes a upb_fielddef from a FieldDescriptorProto. The caller must -// have previously allocated the upb_fielddef. -void upb_fielddef_init(struct upb_fielddef *f, - struct google_protobuf_FieldDescriptorProto *fd); -struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f); -void upb_fielddef_uninit(struct upb_fielddef *f); - -// Sort the given fielddefs in-place, according to what we think is an optimal +// Internal-only interface for the upb compiler. +// Sorts the given fielddefs in-place, according to what we think is an optimal // ordering of fields. This can change from upb release to upb release. -void upb_fielddef_sort(struct upb_fielddef *defs, size_t num); +struct google_protobuf_FieldDescriptorProto; void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, size_t num); -/* Message definition. ********************************************************/ +/* upb_msgdef *****************************************************************/ struct google_protobuf_EnumDescriptorProto; struct google_protobuf_DescriptorProto; // Structure that describes a single .proto message type. struct upb_msgdef { - struct upb_def def; + struct upb_def base; struct upb_msg *default_msg; // Message with all default values set. size_t size; uint32_t num_fields; @@ -148,150 +162,124 @@ struct upb_msgdef { struct upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. - struct upb_inttable fields_by_num; - struct upb_strtable fields_by_name; + struct upb_inttable itof; // int to field + struct upb_strtable ntof; // name to field }; -// The num->field and name->field maps in upb_msgdef allow fast lookup of fields -// by number or name. These lookups are in the critical path of parsing and -// field lookup, so they must be as fast as possible. -struct upb_fieldsbynum_entry { +// Hash table entries for looking up fields by name or number. +struct upb_itof_ent { struct upb_inttable_entry e; - struct upb_fielddef f; + struct upb_fielddef *f; }; -struct upb_fieldsbyname_entry { +struct upb_ntof_ent { struct upb_strtable_entry e; - struct upb_fielddef f; + struct upb_fielddef *f; }; // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. -INLINE struct upb_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m, - uint32_t number) { - struct upb_fieldsbynum_entry *e = (struct upb_fieldsbynum_entry*) - upb_inttable_fast_lookup( - &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); - return e ? &e->f : NULL; +INLINE struct upb_fielddef *upb_msg_itof(struct upb_msgdef *m, uint32_t num) { + struct upb_itof_ent *e; + e = (struct upb_itof_ent*)upb_inttable_fast_lookup( + &m->itof, num, sizeof(struct upb_itof_ent)); + return e ? e->f : NULL; } -INLINE struct upb_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, - struct upb_string *name) { - struct upb_fieldsbyname_entry *e = (struct upb_fieldsbyname_entry*) - upb_strtable_lookup( - &m->fields_by_name, name); - return e ? &e->f : NULL; +INLINE struct upb_fielddef *upb_msg_ntof(struct upb_msgdef *m, + struct upb_string *name) { + struct upb_ntof_ent *e; + e = (struct upb_ntof_ent*) upb_strtable_lookup(&m->ntof, name); + return e ? e->f : NULL; } -// Internal-only functions for constructing a msgdef. Caller retains ownership -// of d and fqname. Ownership of fields passes to the msgdef. -// -// Note that init does not resolve upb_fielddef.ref; the caller should do that -// post-initialization by calling upb_msgdef_resolve() below. -struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields, - struct upb_string *fqname); -void _upb_msgdef_free(struct upb_msgdef *m); -INLINE void upb_msgdef_ref(struct upb_msgdef *m) { - upb_def_ref(&m->def); -} -INLINE void upb_msgdef_unref(struct upb_msgdef *m) { - if(upb_atomic_unref(&m->def.refcount)) _upb_msgdef_free(m); -} - -// Clients use this function on a previously initialized upb_msgdef to resolve -// the "ref" field in the upb_fielddef. Since messages can refer to each -// other in mutually-recursive ways, this step must be separated from -// initialization. -void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, - struct upb_def *def); - -// Downcasts. They are checked only if asserts are enabled. -INLINE struct upb_msgdef *upb_downcast_msgdef(struct upb_def *def) { - assert(def->type == UPB_DEF_MESSAGE); - return (struct upb_msgdef*)def; -} - -/* Enum defintion. ************************************************************/ +/* upb_enumdef ****************************************************************/ struct upb_enumdef { - struct upb_def def; - struct upb_strtable nametoint; - struct upb_inttable inttoname; + struct upb_def base; + struct upb_strtable ntoi; + struct upb_inttable iton; }; -struct upb_enumdef_ntoi_entry { - struct upb_strtable_entry e; - uint32_t value; -}; - -struct upb_enumdef_iton_entry { - struct upb_inttable_entry e; - struct upb_string *string; +typedef int32_t upb_enumval_t; + +// Lookups from name to integer and vice-versa. +bool upb_enumdef_ntoi(struct upb_enumdef *e, struct upb_string *name, + upb_enumval_t *num); +struct upb_string *upb_enumdef_iton(struct upb_enumdef *e, upb_enumval_t num); + +// Iteration over name/value pairs. The order is undefined. +// struct upb_enumd_iter i; +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// // ... +// } +struct upb_enum_iter { + struct upb_enumdef *e; + void *state; // Internal iteration state. + struct upb_string *name; + upb_enumval_t val; }; +void upb_enum_begin(struct upb_enum_iter *iter, struct upb_enumdef *e); +void upb_enum_next(struct upb_enum_iter *iter); +bool upb_enum_done(struct upb_enum_iter *iter); -// Internal-only functions for creating/destroying an enumdef. Caller retains -// ownership of ed. The enumdef is initialized with one ref. -struct upb_enumdef *upb_enumdef_new( - struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname); -void _upb_enumdef_free(struct upb_enumdef *e); -INLINE void upb_enumdef_ref(struct upb_enumdef *e) { upb_def_ref(&e->def); } -INLINE void upb_enumdef_unref(struct upb_enumdef *e) { - if(upb_atomic_unref(&e->def.refcount)) _upb_enumdef_free(e); -} -INLINE struct upb_enumdef *upb_downcast_enumdef(struct upb_def *def) { - assert(def->type == UPB_DEF_ENUM); - return (struct upb_enumdef*)def; -} +/* upb_symtab *****************************************************************/ -/* Unresolved definition. *****************************************************/ +// A SymbolTable is where upb_defs live. It is empty when first constructed. +// Clients add definitions to the symtab by supplying unserialized or +// serialized descriptors (as defined in descriptor.proto). +struct upb_symtab { + upb_atomic_refcount_t refcount; + upb_rwlock_t lock; // Protects all members except the refcount. + struct upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. -// This is a placeholder definition that contains only the name of the type -// that should eventually be referenced. Once symbols are resolved, this -// definition is replaced with a real definition. -struct upb_unresolveddef { - struct upb_def def; - struct upb_string *name; // Not fully-qualified. + // Our symbol tables; we own refs to the defs therein. + struct upb_strtable symtab; // The main symbol table. + struct upb_strtable psymtab; // Private symbols, for internal use. }; -INLINE struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *name) { - struct upb_unresolveddef *d = (struct upb_unresolveddef*)malloc(sizeof(*d)); - upb_def_init(&d->def, UPB_DEF_UNRESOLVED, name); - d->name = name; - upb_string_ref(name); - return d; -} -INLINE void _upb_unresolveddef_free(struct upb_unresolveddef *def) { - upb_def_uninit(&def->def); - upb_string_unref(def->name); +// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd +// when the caller is done with them. +struct upb_symtab *upb_symtab_new(void); +void _upb_symtab_free(struct upb_symtab *s); // Must not be called directly! + +INLINE void upb_symtab_ref(struct upb_symtab *s) { + upb_atomic_ref(&s->refcount); } -INLINE struct upb_unresolveddef *upb_downcast_unresolveddef(struct upb_def *def) { - assert(def->type == UPB_DEF_UNRESOLVED); - return (struct upb_unresolveddef*)def; +INLINE void upb_symtab_unref(struct upb_symtab *s) { + if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); } -INLINE void upb_def_unref(struct upb_def *def) { - if(upb_atomic_unref(&def->refcount)) { - switch(def->type) { - case UPB_DEF_MESSAGE: - _upb_msgdef_free((struct upb_msgdef*)def); - break; - case UPB_DEF_ENUM: - _upb_enumdef_free((struct upb_enumdef*)def); - break; - case UPB_DEF_SERVICE: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_EXTENSION: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - _upb_unresolveddef_free((struct upb_unresolveddef*)def); - break; - default: - assert(false); - } - } -} +// Resolves the given symbol using the rules described in descriptor.proto, +// namely: +// +// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like +// scoping rules are used to find the type (i.e. first the nested types +// within this message are searched, then within the parent, on up to the +// root namespace). +// +// Returns NULL if no such symbol has been defined. +struct upb_def *upb_symtab_resolve(struct upb_symtab *s, + struct upb_string *base, + struct upb_string *symbol); + +// Find an entry in the symbol table with this exact name. Returns NULL if no +// such symbol name has been defined. +struct upb_def *upb_symtab_lookup(struct upb_symtab *s, + struct upb_string *sym); + +// Gets an array of pointers to all currently active defs in this symtab. The +// caller owns the returned array (which is of length *count) as well as a ref +// to each symbol inside. +struct upb_def **upb_symtab_getandref_defs(struct upb_symtab *s, int *count); + +// Adds the definitions in the given serialized descriptor to this symtab. All +// types that are referenced from desc must have previously been defined (or be +// defined in desc). desc may not attempt to define any names that are already +// defined in this symtab. Caller retains ownership of desc. status indicates +// whether the operation was successful or not, and the error message (if any). +void upb_symtab_add_desc(struct upb_symtab *s, struct upb_string *desc, + struct upb_status *status); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_mm.c b/src/upb_mm.c index 60809a5..6f0f766 100644 --- a/src/upb_mm.c +++ b/src/upb_mm.c @@ -24,7 +24,7 @@ void upb_msg_destroy(struct upb_msg *msg) { if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue; upb_mm_destroy(upb_msg_getptr(msg, f), upb_field_ptrtype(f)); } - upb_msgdef_unref(msg->def); + upb_def_unref(UPB_UPCAST(msg->def)); free(msg); } diff --git a/src/upb_msg.c b/src/upb_msg.c index 0106d02..b5879d1 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -177,11 +177,11 @@ static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p, { switch(f->type) { default: assert(false); return 0; /* Internal corruption. */ - case UPB_TYPENUM(MESSAGE): { + case UPB_TYPE(MESSAGE): { size_t submsg_size = get_msgsize(sizes, *p.msg); return upb_get_INT32_size(submsg_size) + submsg_size; } - case UPB_TYPENUM(GROUP): { + case UPB_TYPE(GROUP): { size_t endgrp_tag_size = upb_get_tag_size(f->number); return endgrp_tag_size + get_msgsize(sizes, *p.msg); } @@ -320,8 +320,8 @@ size_t upb_msg_serialize(struct upb_msg_serialize_state *s, struct upb_fielddef *f = &m->fields[i]; //union upb_value_ptr p = upb_msg_getptr(msg, f); buf = serialize_tag(buf, end, f, status); - if(f->type == UPB_TYPENUM(MESSAGE)) { - } else if(f->type == UPB_TYPENUM(GROUP)) { + if(f->type == UPB_TYPE(MESSAGE)) { + } else if(f->type == UPB_TYPE(GROUP)) { } else if(upb_isstring(f)) { } else { //upb_serialize_value(buf, end, f->type, p, status); @@ -339,29 +339,29 @@ bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2, { #define CMP(type) return *p1.type == *p2.type; switch(type) { - case UPB_TYPENUM(DOUBLE): + case UPB_TYPE(DOUBLE): CMP(_double) - case UPB_TYPENUM(FLOAT): + case UPB_TYPE(FLOAT): CMP(_float) - case UPB_TYPENUM(INT64): - case UPB_TYPENUM(SFIXED64): - case UPB_TYPENUM(SINT64): + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): CMP(int64) - case UPB_TYPENUM(UINT64): - case UPB_TYPENUM(FIXED64): + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): CMP(uint64) - case UPB_TYPENUM(INT32): - case UPB_TYPENUM(SFIXED32): - case UPB_TYPENUM(SINT32): + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): CMP(int32) - case UPB_TYPENUM(UINT32): - case UPB_TYPENUM(FIXED32): - case UPB_TYPENUM(ENUM): + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): CMP(uint32); - case UPB_TYPENUM(BOOL): + case UPB_TYPE(BOOL): CMP(_bool); - case UPB_TYPENUM(STRING): - case UPB_TYPENUM(BYTES): + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): return upb_streql(*p1.str, *p2.str); default: return false; } diff --git a/src/upb_msg.h b/src/upb_msg.h index 42f9bb2..adee884 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -59,7 +59,7 @@ INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) { memset(msg, 0, size); upb_mmhead_init(&msg->mmhead); msg->def = md; - upb_msgdef_ref(md); + upb_def_ref(UPB_UPCAST(md)); return msg; } diff --git a/src/upb_parse.c b/src/upb_parse.c index eed8ec8..8f2c2ff 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -276,7 +276,7 @@ uint8_t *upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, union upb_value_ptr v, struct upb_status *status) { #define CASE(t, member_name) \ - case UPB_TYPENUM(t): return upb_get_ ## t(buf, end, v.member_name, status); + case UPB_TYPE(t): return upb_get_ ## t(buf, end, v.member_name, status); switch(ft) { CASE(DOUBLE, _double) @@ -448,13 +448,13 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, void *_buf, size_t len, continue; } - struct upb_fielddef *f = upb_msg_fieldbynum(msgdef, tag.field_number); + struct upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { int32_t delim_len; buf = upb_get_INT32(buf, end, &delim_len, status); CHECK_STATUS(); uint8_t *delim_end = buf + delim_len; - if(f && f->type == UPB_TYPENUM(MESSAGE)) { + if(f && f->type == UPB_TYPE(MESSAGE)) { submsg_end = push(p, start, delim_end - start, f, status); msgdef = p->top->msgdef; } else { @@ -469,7 +469,7 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, void *_buf, size_t len, } else { //if(!f || !upb_check_type(tag.wire_type, f->type)) { // buf = skip_wire_value(buf, end, tag.wire_type, status); - if (f->type == UPB_TYPENUM(GROUP)) { + if (f->type == UPB_TYPE(GROUP)) { submsg_end = push(p, start, 0, f, status); msgdef = p->top->msgdef; } else { diff --git a/src/upb_text.c b/src/upb_text.c index 133552c..1631016 100644 --- a/src/upb_text.c +++ b/src/upb_text.c @@ -15,29 +15,29 @@ void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file) { #define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; switch(type) { - case UPB_TYPENUM(DOUBLE): + case UPB_TYPE(DOUBLE): CASE("%0.f", _double); - case UPB_TYPENUM(FLOAT): + case UPB_TYPE(FLOAT): CASE("%0.f", _float) - case UPB_TYPENUM(INT64): - case UPB_TYPENUM(SFIXED64): - case UPB_TYPENUM(SINT64): + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): CASE("%" PRId64, int64) - case UPB_TYPENUM(UINT64): - case UPB_TYPENUM(FIXED64): + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): CASE("%" PRIu64, uint64) - case UPB_TYPENUM(INT32): - case UPB_TYPENUM(SFIXED32): - case UPB_TYPENUM(SINT32): + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): CASE("%" PRId32, int32) - case UPB_TYPENUM(UINT32): - case UPB_TYPENUM(FIXED32): - case UPB_TYPENUM(ENUM): + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): CASE("%" PRIu32, uint32); - case UPB_TYPENUM(BOOL): + case UPB_TYPE(BOOL): CASE("%hhu", _bool); - case UPB_TYPENUM(STRING): - case UPB_TYPENUM(BYTES): + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): /* TODO: escaping. */ fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; } -- cgit v1.2.3