From a95ab58e79c50b0927eae2b834d3de20a8effc36 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 28 Nov 2009 15:38:29 -0800 Subject: Overhaul defs to derive from a common base. --- src/upb_context.c | 175 +++++++++++++++++++++++++----------------------------- src/upb_context.h | 96 ++++++++++++------------------ src/upb_def.c | 170 +++++++++++++++++++++++++++++++++------------------- src/upb_def.h | 175 +++++++++++++++++++++++++++++++++--------------------- src/upb_mm.c | 2 +- src/upb_msg.c | 2 +- src/upb_parse.c | 2 +- src/upb_string.h | 5 ++ src/upb_table.c | 20 +++---- src/upb_table.h | 12 ++-- 10 files changed, 357 insertions(+), 302 deletions(-) (limited to 'src') diff --git a/src/upb_context.c b/src/upb_context.c index cea82cd..469f879 100644 --- a/src/upb_context.c +++ b/src/upb_context.c @@ -11,6 +11,11 @@ #include "upb_def.h" #include "upb_mm.h" +struct upb_symtab_entry { + struct upb_strtable_entry e; + struct upb_def *def; +}; + /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) { @@ -21,7 +26,6 @@ static int my_memrchr(char *data, char c, size_t len) void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, google_protobuf_FileDescriptorProto *fd, bool sort, - struct upb_context *context, struct upb_status *status); struct upb_context *upb_context_new() @@ -35,7 +39,7 @@ struct upb_context *upb_context_new() google_protobuf_FileDescriptorProto *fd = upb_file_descriptor_set->file->elements[0]; /* We know there is only 1. */ struct upb_status status = UPB_STATUS_INIT; - addfd(&c->psymtab, &c->symtab, fd, false, c, &status); + addfd(&c->psymtab, &c->symtab, fd, false, &status); if(!upb_ok(&status)) { fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg); assert(false); @@ -44,10 +48,7 @@ struct upb_context *upb_context_new() struct upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet"); struct upb_symtab_entry *e = upb_strtable_lookup(&c->psymtab, &name); assert(e); - c->fds_msgdef = e->ref.msg; - c->fds_size = 16; - c->fds_len = 0; - c->fds = malloc(sizeof(*c->fds)); + c->fds_msgdef = upb_downcast_msgdef(e->def); return c; } @@ -56,7 +57,7 @@ static void free_symtab(struct upb_strtable *t) struct upb_symtab_entry *e = upb_strtable_begin(t); for(; e; e = upb_strtable_next(t, &e->e)) { upb_def_unref(e->def); - free(e->e.key.ptr); + upb_string_unref(e->e.key); } upb_strtable_free(t); } @@ -64,10 +65,7 @@ static void free_symtab(struct upb_strtable *t) static void free_context(struct upb_context *c) { free_symtab(&c->symtab); - for(size_t i = 0; i < c->fds_len; i++) - upb_msg_unref((struct upb_msg*)c->fds[i]); free_symtab(&c->psymtab); - free(c->fds); } void upb_context_unref(struct upb_context *c) @@ -81,24 +79,30 @@ void upb_context_unref(struct upb_context *c) } } -bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol, - struct upb_symtab_entry *out_entry) +struct upb_def **upb_context_getandref_defs(struct upb_context *c, int *count) { - upb_rwlock_rdlock(&c->lock); - struct upb_symtab_entry *e = upb_strtable_lookup(&c->symtab, symbol); - if(e) *out_entry = *e; + upb_rwlock_wrlock(&c->lock); + *count = upb_strtable_count(&c->symtab); + struct upb_def **defs = malloc(sizeof(*defs) * (*count)); + struct upb_symtab_entry *e = upb_strtable_begin(&c->symtab); + int i = 0; + for(; e; e = upb_strtable_next(&c->symtab, &e->e), i++) { + assert(e->def); + defs[i] = e->def; + upb_def_ref(defs[i]); + } + assert(*count == i); upb_rwlock_unlock(&c->lock); - return e != NULL; + return defs; } -void upb_context_enumerate(struct upb_context *c, upb_context_enumerator_t cb, - void *udata) +struct upb_def *upb_context_lookup(struct upb_context *c, + struct upb_string *sym) { upb_rwlock_rdlock(&c->lock); - struct upb_symtab_entry *e = upb_strtable_begin(&c->symtab); - for(; e; e = upb_strtable_next(&c->symtab, &e->e)) - cb(udata, e); + struct upb_symtab_entry *e = upb_strtable_lookup(&c->symtab, sym); upb_rwlock_unlock(&c->lock); + return e ? e->def : NULL; } /* Given a symbol and the base symbol inside which it is defined, find the @@ -137,43 +141,41 @@ static struct upb_symtab_entry *resolve(struct upb_strtable *t, } /* Tries to resolve a symbol in two different tables. */ -union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2, - struct upb_string *base, struct upb_string *sym, - enum upb_symbol_type expected_type) { - union upb_symbol_ref nullref = {.msg = NULL}; +struct upb_def *resolve2(struct upb_strtable *t1, struct upb_strtable *t2, + struct upb_string *base, struct upb_string *sym, + enum upb_def_type expected_type) { struct upb_symtab_entry *e = resolve(t1, base, sym); if(e == NULL) e = resolve(t2, base, sym); - - if(e && e->type == expected_type) return e->ref; - else return nullref; + if(e && e->def->type == expected_type) return e->def; + return NULL; } -bool upb_context_resolve(struct upb_context *c, struct upb_string *base, - struct upb_string *symbol, - struct upb_symtab_entry *out_entry) { +struct upb_def *upb_context_resolve(struct upb_context *c, + struct upb_string *base, + struct upb_string *symbol) { upb_rwlock_rdlock(&c->lock); struct upb_symtab_entry *e = resolve(&c->symtab, base, symbol); - if(e) *out_entry = *e; upb_rwlock_unlock(&c->lock); - return e != NULL; + return e ? e->def : NULL; } /* Joins strings together, for example: * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" * join("", "Baz") -> "Baz" * Caller owns the returned string and must free it. */ -static struct upb_string join(struct upb_string *base, struct upb_string *name) { +static struct upb_string *join(struct upb_string *base, struct upb_string *name) { size_t len = base->byte_len + name->byte_len; if(base->byte_len > 0) len++; /* For the separator. */ - struct upb_string joined = {.byte_len=len, .ptr=malloc(len)}; + struct upb_string *joined = upb_string_new(); + upb_string_resize(joined, len); if(base->byte_len > 0) { /* nested_base = base + '.' + d->name */ - memcpy(joined.ptr, base->ptr, base->byte_len); - joined.ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR; - memcpy(&joined.ptr[base->byte_len+1], name->ptr, name->byte_len); + memcpy(joined->ptr, base->ptr, base->byte_len); + joined->ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR; + memcpy(&joined->ptr[base->byte_len+1], name->ptr, name->byte_len); } else { - memcpy(joined.ptr, name->ptr, name->byte_len); + memcpy(joined->ptr, name->ptr, name->byte_len); } return joined; } @@ -181,7 +183,6 @@ static struct upb_string join(struct upb_string *base, struct upb_string *name) static void insert_enum(struct upb_strtable *t, google_protobuf_EnumDescriptorProto *ed, struct upb_string *base, - struct upb_context *c, struct upb_status *status) { if(!ed->set_flags.has.name) { @@ -191,29 +192,24 @@ static void insert_enum(struct upb_strtable *t, return; } - /* We own this and must free it on destruct. */ - struct upb_string fqname = join(base, ed->name); - - if(upb_strtable_lookup(t, &fqname)) { + struct upb_string *fqname = join(base, ed->name); + if(upb_strtable_lookup(t, fqname)) { upb_seterr(status, UPB_STATUS_ERROR, "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(&fqname)); - free(fqname.ptr); + UPB_STRARG(fqname)); + upb_string_unref(fqname); return; } struct upb_symtab_entry e; - e.e.key = fqname; - e.type = UPB_SYM_ENUM; - e.ref._enum = malloc(sizeof(*e.ref._enum)); - upb_enumdef_init(e.ref._enum, ed, c); + e.e.key = fqname; // Donating our ref to the table. + e.def = (struct upb_def*)upb_enumdef_new(ed, fqname); upb_strtable_insert(t, &e.e); } static void insert_message(struct upb_strtable *t, google_protobuf_DescriptorProto *d, struct upb_string *base, bool sort, - struct upb_context *c, struct upb_status *status) { if(!d->set_flags.has.name) { @@ -224,51 +220,51 @@ static void insert_message(struct upb_strtable *t, } /* We own this and must free it on destruct. */ - struct upb_string fqname = join(base, d->name); + struct upb_string *fqname = join(base, d->name); - if(upb_strtable_lookup(t, &fqname)) { + if(upb_strtable_lookup(t, fqname)) { upb_seterr(status, UPB_STATUS_ERROR, "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(&fqname)); - free(fqname.ptr); + UPB_STRARG(fqname)); + upb_string_unref(fqname); return; } struct upb_symtab_entry e; - e.e.key = fqname; - e.type = UPB_SYM_MESSAGE; - e.ref.msg = malloc(sizeof(*e.ref.msg)); - upb_msgdef_init(e.ref.msg, d, &fqname, sort, c, status); - if(!upb_ok(status)) { - free(fqname.ptr); - return; + e.e.key = fqname; // Donating our ref to the table. + struct upb_fielddef *fielddefs = malloc(sizeof(*fielddefs) * d->field->len); + for (unsigned int i = 0; i < d->field->len; i++) { + google_protobuf_FieldDescriptorProto *fd = d->field->elements[i]; + upb_fielddef_init(&fielddefs[i], fd); } + if(sort) upb_fielddef_sort(fielddefs, d->field->len); + e.def = (struct upb_def*)upb_msgdef_new(fielddefs, d->field->len, fqname); upb_strtable_insert(t, &e.e); /* Add nested messages and enums. */ if(d->set_flags.has.nested_type) for(unsigned int i = 0; i < d->nested_type->len; i++) - insert_message(t, d->nested_type->elements[i], &fqname, sort, c, status); + insert_message(t, d->nested_type->elements[i], fqname, sort, status); if(d->set_flags.has.enum_type) for(unsigned int i = 0; i < d->enum_type->len; i++) - insert_enum(t, d->enum_type->elements[i], &fqname, c, status); + insert_enum(t, d->enum_type->elements[i], fqname, status); } void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, google_protobuf_FileDescriptorProto *fd, bool sort, - struct upb_context *c, struct upb_status *status) + struct upb_status *status) { struct upb_string pkg = {.byte_len=0}; if(fd->set_flags.has.package) pkg = *fd->package; if(fd->set_flags.has.message_type) for(unsigned int i = 0; i < fd->message_type->len; i++) - insert_message(addto, fd->message_type->elements[i], &pkg, sort, c, status); + insert_message(addto, fd->message_type->elements[i], &pkg, sort, status); if(fd->set_flags.has.enum_type) for(unsigned int i = 0; i < fd->enum_type->len; i++) - insert_enum(addto, fd->enum_type->elements[i], &pkg, c, status); + insert_enum(addto, fd->enum_type->elements[i], &pkg, status); if(!upb_ok(status)) return; @@ -277,33 +273,34 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, /* Attempt to resolve all references. */ struct upb_symtab_entry *e; for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - if(upb_strtable_lookup(existingdefs, &e->e.key)) { + if(upb_strtable_lookup(existingdefs, e->e.key)) { upb_seterr(status, UPB_STATUS_ERROR, "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(&e->e.key)); + UPB_STRARG(e->e.key)); return; } - if(e->type == UPB_SYM_MESSAGE) { - struct upb_msgdef *m = e->ref.msg; + if(e->def->type == UPB_DEF_MESSAGE) { + struct upb_msgdef *m = upb_downcast_msgdef(e->def); for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - union upb_symbol_ref ref; - if(f->type == UPB_TYPENUM(MESSAGE) || f->type == UPB_TYPENUM(GROUP)) - ref = resolve2(existingdefs, addto, &e->e.key, f->ref.str, - UPB_SYM_MESSAGE); + if(!upb_issubmsg(f) && f->type != UPB_TYPENUM(ENUM)) { + // No resolving necessary. + continue; + } + struct upb_def *def; + struct upb_string *name = upb_downcast_unresolveddef(f->def)->name; + if(upb_issubmsg(f)) + def = resolve2(existingdefs, addto, e->e.key, name, UPB_DEF_MESSAGE); else if(f->type == UPB_TYPENUM(ENUM)) - ref = resolve2(existingdefs, addto, &e->e.key, f->ref.str, - UPB_SYM_ENUM); - else - continue; /* No resolving necessary. */ - if(!ref.msg) { + def = resolve2(existingdefs, addto, e->e.key, name, UPB_DEF_ENUM); + if(!def) { upb_seterr(status, UPB_STATUS_ERROR, "could not resolve symbol '" UPB_STRFMT "'" " in context '" UPB_STRFMT "'", - UPB_STRARG(f->ref.str), UPB_STRARG(&e->e.key)); + UPB_STRARG(name), UPB_STRARG(e->e.key)); return; } - upb_msgdef_setref(m, f, ref); + upb_msgdef_resolve(m, f, def); } } } @@ -320,7 +317,7 @@ void upb_context_addfds(struct upb_context *c, upb_strtable_init(&tmp, 0, sizeof(struct upb_symtab_entry)); upb_rwlock_rdlock(&c->lock); for(uint32_t i = 0; i < fds->file->len; i++) { - addfd(&tmp, &c->symtab, fds->file->elements[i], true, c, status); + addfd(&tmp, &c->symtab, fds->file->elements[i], true, status); if(!upb_ok(status)) { free_symtab(&tmp); upb_rwlock_unlock(&c->lock); @@ -349,17 +346,5 @@ void upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str, upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len, status); if(!upb_ok(status)) return; upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds, status); - if(!upb_ok(status)) return; - - { - /* We own fds now, need to keep a ref so we can free it later. */ - upb_rwlock_wrlock(&c->lock); - if(c->fds_size == c->fds_len) { - c->fds_size *= 2; - c->fds = realloc(c->fds, c->fds_size); - } - c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds; - upb_rwlock_unlock(&c->lock); - } return; } diff --git a/src/upb_context.h b/src/upb_context.h index b20f169..177b42e 100644 --- a/src/upb_context.h +++ b/src/upb_context.h @@ -24,26 +24,18 @@ extern "C" { /* Definitions. ***************************************************************/ -struct upb_symtab_entry { - struct upb_strtable_entry e; - struct upb_def *def; /* We own one ref. */ -}; - struct upb_context { upb_atomic_refcount_t refcount; - upb_rwlock_t lock; - struct upb_strtable symtab; /* The context's symbol table. */ - struct upb_strtable psymtab; /* Private symbols, for internal use. */ - struct upb_msgdef *fds_msgdef; /* In psymtab, ptr here for convenience. */ - - /* A list of the FileDescriptorProtos we own (from having parsed them - * ourselves) and must free on destruction. */ - size_t fds_size, fds_len; - struct google_protobuf_FileDescriptorSet **fds; + upb_rwlock_t lock; // Protects all members except the refcount. + struct upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. + + // Our symbol tables; we own refs to the defs therein. + struct upb_strtable symtab; // The context's symbol table. + struct upb_strtable psymtab; // Private symbols, for internal use. }; -/* Initializes a upb_context. Contexts are not freed explicitly, but unref'd - * when the caller is done with them. */ +// Initializes a upb_context. Contexts are not freed explicitly, but unref'd +// when the caller is done with them. struct upb_context *upb_context_new(void); INLINE void upb_context_ref(struct upb_context *c) { upb_atomic_ref(&c->refcount); @@ -52,55 +44,41 @@ void upb_context_unref(struct upb_context *c); /* Looking up symbols. ********************************************************/ -/* Resolves the given symbol using the rules described in descriptor.proto, - * namely: - * - * If the name starts with a '.', it is fully-qualified. Otherwise, C++-like - * scoping rules are used to find the type (i.e. first the nested types - * within this message are searched, then within the parent, on up to the - * root namespace). - * - * Returns NULL if the symbol has not been defined. */ -bool upb_context_resolve(struct upb_context *c, struct upb_string *base, - struct upb_string *symbol, - struct upb_symtab_entry *out_entry); - -/* Find an entry in the symbol table with this exact name. Returns NULL if no - * such symbol name exists. */ -bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol, - struct upb_symtab_entry *out_entry); - -/* For enumerating over the entries in the symbol table. The enumerator - * callback will be called once for every symtab entry. - * - * The callback *must not* block or take any significant amount of time, since - * the upb_context's lock is held while it is being called! */ -typedef void (*upb_context_enumerator_t)( - void *udata, struct upb_symtab_entry *entry); -void upb_context_enumerate(struct upb_context *c, upb_context_enumerator_t, - void *udata); +// Resolves the given symbol using the rules described in descriptor.proto, +// namely: +// +// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like +// scoping rules are used to find the type (i.e. first the nested types +// within this message are searched, then within the parent, on up to the +// root namespace). +// +// Returns NULL if no such symbol has been defined. +struct upb_def *upb_context_resolve(struct upb_context *c, + struct upb_string *base, + struct upb_string *symbol); + +// Find an entry in the symbol table with this exact name. Returns NULL if no +// such symbol name has been defined. +struct upb_def *upb_context_lookup(struct upb_context *c, + struct upb_string *sym); + +// Gets an array of pointers to all currently active defs in this context. The +// caller owns the returned array (which is of length *count) as well as a ref +// to each symbol inside. +struct upb_def **upb_context_getandref_defs(struct upb_context *c, int *count); /* Adding symbols. ************************************************************/ -/* Adds the definitions in the given file descriptor to this context. All - * types that are referenced from fd must have previously been defined (or be - * defined in fd). fd may not attempt to define any names that are already - * defined in this context. - * - * Caller retains ownership of fd, but the context will contain references to - * it, so it must outlive the context. - * - * upb_context_addfd only returns true or false; it does not give any hint - * about what happened in the case of failure. This is because the descriptor - * is expected to have been validated at the time it was parsed/generated. */ -void upb_context_addfds(struct upb_context *c, - struct google_protobuf_FileDescriptorSet *fds, - struct upb_status *status); - +// Adds the definitions in the given file descriptor to this context. All +// types that are referenced from fd must have previously been defined (or be +// defined in fd). fd may not attempt to define any names that are already +// defined in this context. Caller retains ownership of fd. status indicates +// whether the operation was successful or not, and the error message (if any). +struct google_protobuf_FileDescriptorSet; void upb_context_addfds(struct upb_context *c, struct google_protobuf_FileDescriptorSet *fds, struct upb_status *status); - +// Like the above, but also parses the FileDescriptorSet from fds. void upb_context_parsefds(struct upb_context *c, struct upb_string *fds, struct upb_status *status); diff --git a/src/upb_def.c b/src/upb_def.c index 0d56459..32675f5 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -31,111 +31,157 @@ static int compare_fields(const void *e1, const void *e2) { } } -void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) +/* Callback for sorting fields. */ +static int compare_fields2(const void *e1, const void *e2) { + const struct upb_fielddef *f1 = e1; + const struct upb_fielddef *f2 = e2; + /* Required fields go before non-required. */ + bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + if(req1 != req2) { + return req2 - req1; + } else { + /* Within required and non-required field lists, list in number order. + * TODO: consider ordering by data size to reduce padding. */ + return f1->number - f2->number; + } +} + +void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) +{ + qsort(fds, num, sizeof(*fds), compare_fields); +} + +void upb_fielddef_sort(struct upb_fielddef *defs, size_t num) +{ + qsort(defs, num, sizeof(*defs), compare_fields2); +} + +void upb_def_init(struct upb_def *def, enum upb_def_type type, + struct upb_string *fqname) { + def->type = type; + def->fqname = fqname; + upb_string_ref(fqname); + upb_atomic_refcount_init(&def->refcount, 1); +} + +void upb_def_uninit(struct upb_def *def) { + upb_string_unref(def->fqname); +} + +void upb_fielddef_init(struct upb_fielddef *f, + struct google_protobuf_FieldDescriptorProto *fd) { - qsort(fds, num, sizeof(void*), compare_fields); + f->type = fd->type; + f->label = fd->label; + f->number = fd->number; + f->name = upb_strdup(fd->name); + f->def = NULL; + if(fd->set_flags.has.type_name) { + f->def = (struct upb_def*)upb_unresolveddef_new(fd->type_name); + } } -void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, - struct upb_string *fqname, bool sort, struct upb_context *c, - struct upb_status *status) +void upb_fielddef_uninit(struct upb_fielddef *f) { - (void)status; // Nothing that can fail at the moment. - int num_fields = d->set_flags.has.field ? d->field->len : 0; + upb_string_unref(f->name); + if(upb_fielddef_hasdef(f)) upb_def_unref(f->def); +} + +struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f) +{ + struct upb_fielddef *new_f = malloc(sizeof(*new_f)); + new_f->type = f->type; + new_f->label = f->label; + new_f->number = f->number; + new_f->name = upb_strdup(f->name); + new_f->type = f->type; + new_f->def = NULL; + if(upb_fielddef_hasdef(f)) { + new_f->def = f->def; + upb_def_ref(new_f->def); + } + return new_f; +} + +struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields, + struct upb_string *fqname) +{ + struct upb_msgdef *m = malloc(sizeof(*m)); + upb_def_init(&m->def, UPB_DEF_MESSAGE, fqname); upb_inttable_init(&m->fields_by_num, num_fields, sizeof(struct upb_fieldsbynum_entry)); upb_strtable_init(&m->fields_by_name, num_fields, sizeof(struct upb_fieldsbyname_entry)); - upb_atomic_refcount_init(&m->refcount, 1); - m->fqname = upb_strdup(fqname); - m->context = c; m->num_fields = num_fields; m->set_flags_bytes = div_round_up(m->num_fields, 8); - /* These are incremented in the loop. */ + // These are incremented in the loop. m->num_required_fields = 0; m->size = m->set_flags_bytes; - - m->fields = malloc(sizeof(*m->fields) * m->num_fields); - - /* Create a sorted list of the fields. */ - google_protobuf_FieldDescriptorProto **fds = - malloc(sizeof(*fds) * m->num_fields); - for(unsigned int i = 0; i < m->num_fields; i++) { - /* We count on the caller to keep this pointer alive. */ - fds[i] = d->field->elements[i]; - } - if(sort) upb_msgdef_sortfds(fds, m->num_fields); + m->fields = fields; size_t max_align = 0; - for(unsigned int i = 0; i < m->num_fields; i++) { + for(int i = 0; i < num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = fds[i]; - struct upb_type_info *type_info = &upb_type_info[fd->type]; + struct upb_type_info *type_info = &upb_type_info[f->type]; - /* General alignment rules are: each member must be at an address that is a - * multiple of that type's alignment. Also, the size of the structure as - * a whole must be a multiple of the greatest alignment of any member. */ + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as + // a whole must be a multiple of the greatest alignment of any member. */ f->field_index = i; f->byte_offset = ALIGN_UP(m->size, type_info->align); - f->type = fd->type; - f->label = fd->label; - f->number = fd->number; - f->name = upb_strdup(fd->name); - f->ref.str = fd->type_name; m->size = f->byte_offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); - if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) + if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) { + // We currently rely on the fact that required fields are always sorted + // to occur before non-required fields. m->num_required_fields++; + } - /* Insert into the tables. Note that f->ref will be uninitialized, even in - * the tables' copies of *f, which is why we must update them separately - * in upb_msg_setref() below. */ - struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; - struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; + // Insert into the tables. Note that f->ref will be uninitialized, even in + // the tables' copies of *f, which is why we must update them separately + // in upb_msg_setref() below. + struct upb_fieldsbynum_entry nument = {.e = {.key = f->number}, .f = *f}; + struct upb_fieldsbyname_entry strent = {.e = {.key = upb_strdup(f->name)}, .f = *f}; upb_inttable_insert(&m->fields_by_num, &nument.e); upb_strtable_insert(&m->fields_by_name, &strent.e); } - if(max_align > 0) - m->size = ALIGN_UP(m->size, max_align); - free(fds); + if(max_align > 0) m->size = ALIGN_UP(m->size, max_align); + return m; } void _upb_msgdef_free(struct upb_msgdef *m) { + upb_def_uninit(&m->def); upb_inttable_free(&m->fields_by_num); upb_strtable_free(&m->fields_by_name); - upb_string_unref(m->fqname); - for (unsigned int i = 0; i < m->num_fields; i++) { - struct upb_fielddef *f = &m->fields[i]; - upb_string_unref(f->name); - if (upb_issubmsg(f) || f->type == UPB_TYPENUM(ENUM)) - upb_def_unref(f->ref, f->type); - } + for (unsigned int i = 0; i < m->num_fields; i++) + upb_fielddef_uninit(&m->fields[i]); free(m->fields); free(m); } -void upb_msgdef_setref(struct upb_msgdef *m, struct upb_fielddef *f, - union upb_def_ptr ref) { +void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, + struct upb_def *def) { struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup( &m->fields_by_num, f->number, sizeof(struct upb_fieldsbynum_entry)); struct upb_fieldsbyname_entry *str_e = upb_strtable_lookup(&m->fields_by_name, f->name); assert(int_e && str_e); - f->ref = ref; - int_e->f.ref = ref; - str_e->f.ref = ref; - upb_def_ref(ref, f->type); + f->def = def; + int_e->f.def = def; + str_e->f.def = def; + upb_def_ref(def); } -void upb_enumdef_init(struct upb_enumdef *e, - struct google_protobuf_EnumDescriptorProto *ed, - struct upb_context *c) { +struct upb_enumdef *upb_enumdef_new( + struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname) +{ + struct upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->def, UPB_DEF_ENUM, fqname); int num_values = ed->set_flags.has.value ? ed->value->len : 0; - e->context = c; - upb_atomic_refcount_init(&e->refcount, 1); upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enumdef_ntoi_entry)); upb_inttable_init(&e->inttoname, num_values, @@ -143,16 +189,18 @@ void upb_enumdef_init(struct upb_enumdef *e, for(int i = 0; i < num_values; i++) { google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i]; - struct upb_enumdef_ntoi_entry ntoi_entry = {.e = {.key = *value->name}, + struct upb_enumdef_ntoi_entry ntoi_entry = {.e = {.key = upb_strdup(value->name)}, .value = value->number}; struct upb_enumdef_iton_entry iton_entry = {.e = {.key = value->number}, .string = value->name}; upb_strtable_insert(&e->nametoint, &ntoi_entry.e); upb_inttable_insert(&e->inttoname, &iton_entry.e); } + return e; } void _upb_enumdef_free(struct upb_enumdef *e) { + upb_def_uninit(&e->def); upb_strtable_free(&e->nametoint); upb_inttable_free(&e->inttoname); free(e); diff --git a/src/upb_def.h b/src/upb_def.h index 6a6622a..e58f01f 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -9,6 +9,9 @@ * - upb_enumdef: describes an enum. * (TODO: descriptions of extensions and services). * + * Defs should be obtained from a upb_context object; the APIs for creating + * them directly are internal-only. + * * Defs are immutable and reference-counted. Contexts reference any defs * that are the currently in their symbol table. If an extension is loaded * that adds a field to an existing message, a new msgdef is constructed that @@ -44,13 +47,15 @@ enum upb_def_type { // Common members. struct upb_def { + struct upb_string *fqname; // Fully qualified. enum upb_def_type type; upb_atomic_refcount_t refcount; - struct upb_string *fqname; /* Fully-qualified. */ -} +}; void upb_def_init(struct upb_def *def, enum upb_def_type type, struct upb_string *fqname); +void upb_def_uninit(struct upb_def *def); +INLINE void upb_def_ref(struct upb_def *def) { upb_atomic_ref(&def->refcount); } /* Field definition. **********************************************************/ @@ -72,6 +77,7 @@ struct upb_fielddef { struct upb_def *def; }; +// A variety of tests about the type of a field. INLINE bool upb_issubmsg(struct upb_fielddef *f) { return upb_issubmsgtype(f->type); } @@ -81,6 +87,10 @@ INLINE bool upb_isstring(struct upb_fielddef *f) { INLINE bool upb_isarray(struct upb_fielddef *f) { return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; } +// Does the type of this field imply that it should contain an associated def? +INLINE bool upb_fielddef_hasdef(struct upb_fielddef *f) { + return upb_issubmsg(f) || f->type == UPB_TYPENUM(ENUM); +} INLINE bool upb_field_ismm(struct upb_fielddef *f) { return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); @@ -105,18 +115,22 @@ INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f) { else return -1; } -// Interfaces for constructing/destroying fielddefs. These are internal-only. struct google_protobuf_FieldDescriptorProto; +// Interfaces for constructing/destroying fielddefs. These are internal-only. + // Initializes a upb_fielddef from a FieldDescriptorProto. The caller must // have previously allocated the upb_fielddef. -void upb_fielddef_init(struct google_protobuf_FieldDescriptorProto *fd, - struct upb_fielddef *f); +void upb_fielddef_init(struct upb_fielddef *f, + struct google_protobuf_FieldDescriptorProto *fd); +struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f); void upb_fielddef_uninit(struct upb_fielddef *f); // Sort the given fielddefs in-place, according to what we think is an optimal // ordering of fields. This can change from upb release to upb release. void upb_fielddef_sort(struct upb_fielddef *defs, size_t num); +void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, + size_t num); /* Message definition. ********************************************************/ @@ -155,22 +169,52 @@ struct upb_fieldsbyname_entry { // possible. These return NULL if no such field is found. INLINE struct upb_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m, uint32_t number) { - struct upb_fieldsbynum_entry *e = upb_inttable_fast_lookup( - &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); + struct upb_fieldsbynum_entry *e = (struct upb_fieldsbynum_entry*) + upb_inttable_fast_lookup( + &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); return e ? &e->f : NULL; } INLINE struct upb_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, struct upb_string *name) { - struct upb_fieldsbyname_entry *e = upb_strtable_lookup( - &m->fields_by_name, name); + struct upb_fieldsbyname_entry *e = (struct upb_fieldsbyname_entry*) + upb_strtable_lookup( + &m->fields_by_name, name); return e ? &e->f : NULL; } +// Internal-only functions for constructing a msgdef. Caller retains ownership +// of d and fqname. Ownership of fields passes to the msgdef. +// +// Note that init does not resolve upb_fielddef.ref; the caller should do that +// post-initialization by calling upb_msgdef_resolve() below. +struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields, + struct upb_string *fqname); +void _upb_msgdef_free(struct upb_msgdef *m); +INLINE void upb_msgdef_ref(struct upb_msgdef *m) { + upb_def_ref(&m->def); +} +INLINE void upb_msgdef_unref(struct upb_msgdef *m) { + if(upb_atomic_unref(&m->def.refcount)) _upb_msgdef_free(m); +} + +// Clients use this function on a previously initialized upb_msgdef to resolve +// the "ref" field in the upb_fielddef. Since messages can refer to each +// other in mutually-recursive ways, this step must be separated from +// initialization. +void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, + struct upb_def *def); + +// Downcasts. They are checked only if asserts are enabled. +INLINE struct upb_msgdef *upb_downcast_msgdef(struct upb_def *def) { + assert(def->type == UPB_DEF_MESSAGE); + return (struct upb_msgdef*)def; +} + /* Enum defintion. ************************************************************/ struct upb_enumdef { - upb_atomic_refcount_t refcount; + struct upb_def def; struct upb_strtable nametoint; struct upb_inttable inttoname; }; @@ -185,72 +229,67 @@ struct upb_enumdef_iton_entry { struct upb_string *string; }; -/* Internal functions. ********************************************************/ - -/* Initializes/frees a upb_msgdef. Usually this will be called by upb_context, - * and clients will not have to construct one directly. - * - * Caller retains ownership of d and fqname. Note that init does not resolve - * upb_fielddef.ref the caller should do that post-initialization by - * calling upb_msg_ref() below. - * - * fqname indicates the fully-qualified name of this message. - * - * sort indicates whether or not it is safe to reorder the fields from the order - * they appear in d. This should be false if code has been compiled against a - * header for this type that expects the given order. */ -void upb_msgdef_init(struct upb_msgdef *m, - struct google_protobuf_DescriptorProto *d, - struct upb_string *fqname, bool sort, - struct upb_status *status); -void _upb_msgdef_free(struct upb_msgdef *m); -INLINE void upb_msgdef_ref(struct upb_msgdef *m) { - upb_atomic_ref(&m->refcount); +// Internal-only functions for creating/destroying an enumdef. Caller retains +// ownership of ed. The enumdef is initialized with one ref. +struct upb_enumdef *upb_enumdef_new( + struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname); +void _upb_enumdef_free(struct upb_enumdef *e); +INLINE void upb_enumdef_ref(struct upb_enumdef *e) { upb_def_ref(&e->def); } +INLINE void upb_enumdef_unref(struct upb_enumdef *e) { + if(upb_atomic_unref(&e->def.refcount)) _upb_enumdef_free(e); } -INLINE void upb_msgdef_unref(struct upb_msgdef *m) { - if(upb_atomic_unref(&m->refcount)) _upb_msgdef_free(m); +INLINE struct upb_enumdef *upb_downcast_enumdef(struct upb_def *def) { + assert(def->type == UPB_DEF_ENUM); + return (struct upb_enumdef*)def; } -/* Clients use this function on a previously initialized upb_msgdef to resolve - * the "ref" field in the upb_fielddef. Since messages can refer to each - * other in mutually-recursive ways, this step must be separated from - * initialization. */ -void upb_msgdef_setref(struct upb_msgdef *m, struct upb_fielddef *f, - union upb_symbol_ref ref); - -/* Initializes and frees an enum, respectively. Caller retains ownership of - * ed. The enumdef is initialized with one ref. */ -void upb_enumdef_init(struct upb_enumdef *e, - struct google_protobuf_EnumDescriptorProto *ed); -void _upb_enumdef_free(struct upb_enumdef *e); -INLINE void upb_enumdef_ref(struct upb_enumdef *e) { - upb_atomic_ref(&e->refcount); +/* Unresolved definition. *****************************************************/ + +// This is a placeholder definition that contains only the name of the type +// that should eventually be referenced. Once symbols are resolved, this +// definition is replaced with a real definition. +struct upb_unresolveddef { + struct upb_def def; + struct upb_string *name; // Not fully-qualified. +}; + +INLINE struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *name) { + struct upb_unresolveddef *d = (struct upb_unresolveddef*)malloc(sizeof(*d)); + upb_def_init(&d->def, UPB_DEF_UNRESOLVED, name); + d->name = name; + upb_string_ref(name); + return d; } -INLINE void upb_enumdef_unref(struct upb_enumdef *e) { - if(upb_atomic_unref(&e->refcount)) _upb_enumdef_free(e); +INLINE void _upb_unresolveddef_free(struct upb_unresolveddef *def) { + upb_def_uninit(&def->def); + upb_string_unref(def->name); +} +INLINE struct upb_unresolveddef *upb_downcast_unresolveddef(struct upb_def *def) { + assert(def->type == UPB_DEF_UNRESOLVED); + return (struct upb_unresolveddef*)def; } -INLINE void upb_def_ref(struct upb_def *def) { upb_atomic_ref(&def->refcount); } INLINE void upb_def_unref(struct upb_def *def) { if(upb_atomic_unref(&def->refcount)) { - switch(def->type) { - case UPB_DEF_MESSAGE: - _upb_msgdef_free((struct upb_msgdef*)def); - break; - case UPB_DEF_ENUM: - _upb_emumdef_free((struct upb_enumdef*)def); - break; - case UPB_DEF_SERVICE: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_EXTENSION, - _upb_extensiondef_free((struct upb_extensiondef*)def); - break; - case UPB_DEF_UNRESOLVED - upb_string_unref((struct upb_string*)def); - break; - default: - assert(false); + switch(def->type) { + case UPB_DEF_MESSAGE: + _upb_msgdef_free((struct upb_msgdef*)def); + break; + case UPB_DEF_ENUM: + _upb_enumdef_free((struct upb_enumdef*)def); + break; + case UPB_DEF_SERVICE: + assert(false); /* Unimplemented. */ + break; + case UPB_DEF_EXTENSION: + assert(false); /* Unimplemented. */ + break; + case UPB_DEF_UNRESOLVED: + _upb_unresolveddef_free((struct upb_unresolveddef*)def); + break; + default: + assert(false); + } } } diff --git a/src/upb_mm.c b/src/upb_mm.c index cad1158..60809a5 100644 --- a/src/upb_mm.c +++ b/src/upb_mm.c @@ -47,7 +47,7 @@ static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type, { union upb_mmptr p = {NULL}; switch(type) { - case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg); + case UPB_MM_MSG_REF: p.msg = upb_msg_new(upb_downcast_msgdef(f->def)); case UPB_MM_STR_REF: p.str = upb_string_new(); case UPB_MM_ARR_REF: p.arr = upb_array_new(f); default: assert(false); break; diff --git a/src/upb_msg.c b/src/upb_msg.c index 5f96980..0106d02 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -100,7 +100,7 @@ static void start_cb(void *udata, struct upb_fielddef *f) if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) { if(*p.msg) upb_msg_unref(*p.msg); - *p.msg = upb_msg_new(f->ref.msg); + *p.msg = upb_msg_new(upb_downcast_msgdef(f->def)); } upb_msg_clear(*p.msg); upb_msg_set(oldmsg, f); diff --git a/src/upb_parse.c b/src/upb_parse.c index 3abaedf..eed8ec8 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -387,7 +387,7 @@ static uint8_t *push(struct upb_cbparser *p, uint8_t *start, } struct upb_cbparser_frame *frame = p->top; frame->end_offset = p->completed_offset + submsg_len; - frame->msgdef = f->ref.msg; + frame->msgdef = upb_downcast_msgdef(f->def); if(p->start_cb) p->start_cb(p->udata, f); return get_msgend(p, start); diff --git a/src/upb_string.h b/src/upb_string.h index 505ac5b..c1caddc 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -58,6 +58,11 @@ INLINE void upb_string_unref(struct upb_string *str) if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str); } +INLINE void upb_string_ref(struct upb_string *str) +{ + upb_mmhead_ref(&str->mmhead); +} + /* Resizes the string to size, reallocating if necessary. Does not preserve * existing data. */ INLINE void upb_string_resize(struct upb_string *str, uint32_t size) diff --git a/src/upb_table.c b/src/upb_table.c index 036d175..2c4c824 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -61,7 +61,7 @@ void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key) struct upb_strtable_entry *e; do { e = strent(t, bucket); - if(upb_streql(&e->key, key)) return e; + if(e->key && upb_streql(e->key, key)) return e; } while((bucket = e->next) != UPB_END_OF_CHAIN); return NULL; } @@ -141,7 +141,7 @@ static uint32_t empty_strbucket(struct upb_strtable *table) /* TODO: does it matter that this is biased towards the front of the table? */ for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { struct upb_strtable_entry *e = strent(table, i); - if(e->key.byte_len == 0) return i; + if(e->key == NULL) return i; } assert(false); return 0; @@ -149,12 +149,12 @@ static uint32_t empty_strbucket(struct upb_strtable *table) static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e) { - assert(upb_strtable_lookup(t, &e->key) == NULL); + assert(upb_strtable_lookup(t, e->key) == NULL); t->t.count++; - uint32_t bucket = strtable_bucket(t, &e->key); + uint32_t bucket = strtable_bucket(t, e->key); struct upb_strtable_entry *table_e = strent(t, bucket); - if(table_e->key.byte_len != 0) { /* Collision. */ - if(bucket == strtable_bucket(t, &table_e->key)) { + if(table_e->key != NULL) { /* Collision. */ + if(bucket == strtable_bucket(t, table_e->key)) { /* Existing element is in its main posisiton. Find an empty slot to * place our new element and append it to this key's chain. */ uint32_t empty_bucket = empty_strbucket(t); @@ -166,11 +166,11 @@ static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e) /* Existing element is not in its main position. Move it to an empty * slot and put our element in its main position. */ uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, &table_e->key); + uint32_t evictee_bucket = strtable_bucket(t, table_e->key); memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ struct upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(evictee_e->key.byte_len != 0); + assert(evictee_e->key != NULL); assert(evictee_e->next != UPB_END_OF_CHAIN); if(evictee_e->next == bucket) { evictee_e->next = empty_bucket; @@ -183,7 +183,7 @@ static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e) } memcpy(table_e, e, t->t.entry_size); table_e->next = UPB_END_OF_CHAIN; - assert(upb_strtable_lookup(t, &e->key) == table_e); + assert(upb_strtable_lookup(t, e->key) == table_e); } void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e) @@ -223,7 +223,7 @@ void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur) do { cur = (void*)((char*)cur + t->t.entry_size); if(cur == end) return NULL; - } while(cur->key.byte_len == 0); + } while(cur->key == NULL); return cur; } diff --git a/src/upb_table.h b/src/upb_table.h index 3855e3e..2202684 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -34,13 +34,13 @@ struct upb_inttable_entry { uint32_t next; /* Internal chaining. */ }; -/* TODO: consider storing the hash in the entry. This would avoid the need to - * rehash on table resizes, but more importantly could possibly improve lookup - * performance by letting us compare hashes before comparing lengths or the - * strings themselves. */ +// TODO: consider storing the hash in the entry. This would avoid the need to +// rehash on table resizes, but more importantly could possibly improve lookup +// performance by letting us compare hashes before comparing lengths or the +// strings themselves. struct upb_strtable_entry { - struct upb_string key; - uint32_t next; /* Internal chaining. */ + struct upb_string *key; // We own one ref. + uint32_t next; // Internal chaining. }; struct upb_table { -- cgit v1.2.3