From 6bf58a7328fb5241e2f66ef39c60e4483acfb19d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 26 Jun 2010 14:52:41 -0700 Subject: Incremental progress on upb_def. --- src/upb_def.c | 511 ++++++++++++++++++++++++---------------------------------- 1 file changed, 209 insertions(+), 302 deletions(-) (limited to 'src/upb_def.c') diff --git a/src/upb_def.c b/src/upb_def.c index 9f34b42..39ed546 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -5,10 +5,8 @@ */ #include -#include -#include "descriptor.h" +#include "descriptor_const.h" #include "upb_def.h" -#include "upb_data.h" /* Rounds p up to the next multiple of t. */ #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) @@ -23,10 +21,10 @@ typedef struct { upb_def **defs; uint32_t len; uint32_t size; -}; +} upb_deflist; static void upb_deflist_init(upb_deflist *l) { - l->size = 8 + l->size = 8; l->defs = malloc(l->size); l->len = 0; } @@ -34,11 +32,11 @@ static void upb_deflist_init(upb_deflist *l) { static void upb_deflist_uninit(upb_deflist *l) { free(l->defs); } static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->defs_len == l->defs_size) { - l->defs_size *= 2; - l->defs = realloc(l->defs, l->defs_size); + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size); } - l->defs[l->defs_len++] = d; + l->defs[l->len++] = d; } /* upb_def ********************************************************************/ @@ -171,12 +169,11 @@ void _upb_def_cyclic_ref(upb_def *def) { cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } -static void upb_def_init(upb_def *def, enum upb_def_type type, - upb_strptr fqname) { +static void upb_def_init(upb_def *def, upb_def_type type, upb_string *fqname) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; - def->fqname = NULL; + def->fqname = fqname; upb_atomic_refcount_init(&def->refcount, 1); } @@ -188,19 +185,15 @@ static void upb_def_uninit(upb_def *def) { typedef struct _upb_unresolveddef { upb_def base; - upb_strptr name; } upb_unresolveddef; -static upb_unresolveddef *upb_unresolveddef_new(upb_strptr str) { +static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); - upb_strptr name = upb_string_getref(str, UPB_REF_THREADUNSAFE_READONLY); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name); - def->name = name; + upb_def_init(&def->base, UPB_DEF_UNRESOLVED, str); return def; } static void unresolveddef_free(struct _upb_unresolveddef *def) { - upb_string_unref(def->name); upb_def_uninit(&def->base); free(def); } @@ -215,7 +208,7 @@ static upb_fielddef *fielddef_new(upb_src *src) upb_src_startmsg(src); upb_fielddef *parsed_f; while((parsed_f = upb_src_getdef(src))) { - switch(parsed_f->field_number) { + switch(parsed_f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: CHECK(upb_src_getval(src, &f->type)); case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: @@ -243,92 +236,50 @@ static void fielddef_free(upb_fielddef *f) { free(f); } -static void fielddef_copy(upb_fielddef *dst, upb_fielddef *src) -{ - *dst = *src; - dst->name = upb_string_getref(src->name, UPB_REF_FROZEN); - if(upb_hasdef(src)) { - upb_def_ref(dst->def); - dst->owned = true; - } -} - -// Callback for sorting fields. -static int compare_fields(upb_fielddef *f1, upb_fielddef *f2) { - // Required fields go before non-required. - bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - if(req1 != req2) { - return req2 - req1; - } else { - // Within required and non-required field lists, list in number order. - // TODO: consider ordering by data size to reduce padding. */ - return f1->number - f2->number; - } -} - -static int compare_fielddefs(const void *e1, const void *e2) { - return compare_fields(*(void**)e1, *(void**)e2); -} - -static void fielddef_sort(upb_fielddef **defs, size_t num) -{ - qsort(defs, num, sizeof(*defs), compare_fielddefs); -} - /* upb_msgdef *****************************************************************/ -static upb_msgdef *msgdef_new(upb_fielddef **fields, int num_fields, - upb_strptr fqname, upb_status *status) +// Processes a google.protobuf.DescriptorProto, adding defs to "deflist." +static void upb_addmsg(upb_src *src, upb_deflist *deflist, upb_status *status) { - if(num_fields > UPB_MAX_FIELDS) { - upb_seterr(status, UPB_STATUS_ERROR, - "Tried to create a msgdef with more than %d fields", num_fields); - free(fields); - return NULL; - } upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG, fqname); + upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); - - m->num_fields = num_fields; - m->set_flags_bytes = div_round_up(m->num_fields, 8); - // These are incremented in the loop. - m->num_required_fields = 0; - m->size = m->set_flags_bytes + 4; // 4 for the refcount. + m->num_fields = 0; m->fields = malloc(sizeof(upb_fielddef) * num_fields); + int32_t start_count = defs->len; - size_t max_align = 0; - for(int i = 0; i < num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - upb_type_info *type_info = &upb_types[fields[i]->type]; - fielddef_copy(f, fields[i]); - - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as - // a whole must be a multiple of the greatest alignment of any member. */ - f->field_index = i; - size_t offset = ALIGN_UP(m->size, type_info->align); - f->byte_offset = offset - 4; // Offsets are relative to the refcount. - m->size = offset + type_info->size; - max_align = UPB_MAX(max_align, type_info->align); - if(f->label == UPB_LABEL(REQUIRED)) { - // We currently rely on the fact that required fields are always sorted - // to occur before non-required fields. - m->num_required_fields++; + CHECK(upb_src_startmsg(src)); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(m->fqname); + CHECK(upb_src_getval(src, &m->fqname)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_FIELD_NUM: + CHECK(upb_addfield(src, m)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NESTED_TYPE_NUM: + CHECK(upb_addmsg(src, deflist)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: + CHECK(upb_addenum(src, deflist)); + break; + default: + // TODO: extensions. + upb_src_skipval(src); } - - // Insert into the tables. - upb_itof_ent itof_ent = {{f->number, 0}, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, &itof_ent.e); - upb_strtable_insert(&m->ntof, &ntof_ent.e); } - - if(max_align > 0) m->size = ALIGN_UP(m->size, max_align); - return m; + CHECK(upb_src_eof(src) && upb_src_endmsg(src)); + if(!m->fqname) { + upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + return false; + } + upb_qualify(defs, m->fqname, start_count); + upb_deflist_push(m); + return true; } static void msgdef_free(upb_msgdef *m) @@ -363,6 +314,50 @@ typedef struct { upb_strptr string; } iton_ent; +static void upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) +{ + upb_src_startmsg(src); + int32_t number = -1; + upb_string *name = NULL; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + upb_src_getval(src, &number); + break; + case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: + upb_src_getval(src, &name); + break; + default: + upb_src_skipval(src); + } + } + upb_src_endmsg(src); + ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; + iton_ent iton_ent = {{value->number, 0}, value->name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); +} + +static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM, fqname); + upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECK(upb_addenum_val(src, e, status)); + break; + default: + upb_src_skipval(src); + break; + } + } + upb_deflist_push(e); +} + static void enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -397,10 +392,37 @@ bool upb_enum_done(upb_enum_iter *iter) { /* symtab internal ***********************************************************/ -typedef struct { - upb_strtable_entry e; - upb_def *def; -} upb_symtab_ent; +// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". +static void upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_string *package = NULL; + int32_t start_count = defs->len; + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(package); + CHECK(upb_src_getval(src, &package)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_NUM: + CHECK(upb_startmsg(src)); + CHECK(upb_addmsg(src, defs)); + CHECK(upb_endmsg(src)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: + CHECK(upb_startmsg(src)); + CHECK(upb_addenum(src, defs)); + CHECK(upb_endmsg(src)); + break; + default: + // TODO: services and extensions. + upb_src_skipval(src); + } + } + CHECK(upb_src_eof(src)); + upb_qualify(deflist, package, start_count); + upb_string_unref(package); +} /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) @@ -410,6 +432,11 @@ static int my_memrchr(char *data, char c, size_t len) return off; } +typedef struct { + upb_strtable_entry e; + upb_def *def; +} upb_symtab_ent; + /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol) @@ -459,206 +486,15 @@ static upb_string *upb_join(upb_string *base, upb_string *name) { return joined; } -static void upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) -{ - upb_src_startmsg(src); - int32_t number = -1; - upb_string *name = NULL; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - upb_src_getval(src, &number); - break; - case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: - upb_src_getval(src, &name); - break; - default: - upb_src_skipval(src); - } - } - upb_src_endmsg(src); - ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; - iton_ent iton_ent = {{value->number, 0}, value->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); -} - -static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM, fqname); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); - CHECK(upb_src_startmsg(src)); - - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - CHECK(upb_addenum_val(src, e, status)); - break; - default: - upb_src_skipval(src); - break; - } - } - upb_deflist_push(e); -} - -// Processes a google.protobuf.DescriptorProto, adding defs to "deflist." -static void upb_addmsg(upb_src *src, upb_deflist *deflist, upb_status *status) -{ - upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_refcount_init(&m->cycle_refcount, 0); - upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); - m->num_fields = 0; - m->fields = malloc(sizeof(upb_fielddef) * num_fields); - int32_t start_count = defs->len; - - CHECK(upb_src_startmsg(src)); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(m->fqname); - CHECK(upb_src_getval(src, &m->fqname)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_FIELD_NUM: - CHECK(upb_addfield(src, m)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NESTED_TYPE_NUM: - CHECK(upb_addmsg(src, deflist)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: - CHECK(upb_addenum(src, deflist)); - break; - default: - // TODO: extensions. - upb_src_skipval(src); - } - } - CHECK(upb_src_eof(src) && upb_src_endmsg(src)); - if(!m->fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return false; - } - upb_qualify(defs, m->fqname, start_count); - upb_deflist_push(m); - return true; -} - -// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". -static void upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) -{ - CHECK(upb_src_startmsg(src)); - upb_string *package = NULL; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(package); - CHECK(upb_src_getval(src, &package)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_NUM: - CHECK(upb_addmsg(src, defs)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: - CHECK(upb_addenum(src, defs)); - break; - default: - // TODO: services and extensions. - upb_src_skipval(src); - } - } - CHECK(upb_src_eof(src) && upb_src_endmsg(src)); - - upb_qualify(deflist, package, 0); - upb_string_unref(package); -} - -/* upb_symtab *****************************************************************/ - -upb_symtab *upb_symtab_new() -{ - upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_refcount_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); - return s; -} - -static void free_symtab(upb_strtable *t) -{ - symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - free_symtab(&s->symtab); - free_symtab(&s->psymtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) -{ - upb_rwlock_rdlock(&s->lock); - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - upb_def **defs = malloc(sizeof(*defs) * total); - symtab_ent *e = upb_strtable_begin(&s->symtab); - int i = 0; - for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { - upb_def *def = e->def; - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - upb_rwlock_unlock(&s->lock); - *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); - return defs; -} - -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) -{ - upb_rwlock_rdlock(&s->lock); - symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - - -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { - upb_rwlock_rdlock(&s->lock); - symtab_ent *e = resolve(&s->symtab, base, symbol); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - +// Performs a pass over the type graph to find all cycles that include m. static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *status) { if(search_depth > UPB_MAX_TYPE_DEPTH) { - // There are many situations in upb where we recurse over the type tree - // (like for example, right now) and an absurdly deep tree could cause us - // to stack overflow on systems with very limited stacks. + // We have found a non-cyclic path from the base of the type tree that + // exceeds the maximum allowed depth. There are many situations in upb + // where we recurse over the type tree (like for example, right now) and an + // absurdly deep tree could cause us to stack overflow on systems with very + // limited stacks. upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " "depth %d in the type graph, which exceeds the maximum type " "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, @@ -709,9 +545,8 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, { // Build a table, for duplicate detection and name resolution. - - // Attempt to resolve all references. { // Write lock scope. + // Attempt to resolve all references. symtab_ent *e; for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { upb_msgdef *m = upb_dyncast_msgdef(e->def); @@ -742,11 +577,8 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; - - // Do an initial pass over the graph to check that there are no cycles - // longer than the maximum length. We also mark all cyclic defs as such, - // and decrement refs on cyclic defs. - find_cycles(m, 0, status); + // The findcycles() call will decrement the external refcount of the + upb_symtab_findcycles(m, 0, status); upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; cycle_ref_or_unref(m, NULL, open_defs, 0, true); } @@ -755,6 +587,81 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, } } +/* upb_symtab *****************************************************************/ + +upb_symtab *upb_symtab_new() +{ + upb_symtab *s = malloc(sizeof(*s)); + upb_atomic_refcount_init(&s->refcount, 1); + upb_rwlock_init(&s->lock); + upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); + return s; +} + +static void free_symtab(upb_strtable *t) +{ + symtab_ent *e; + for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(t); +} + +void _upb_symtab_free(upb_symtab *s) +{ + free_symtab(&s->symtab); + free_symtab(&s->psymtab); + upb_rwlock_destroy(&s->lock); + free(s); +} + +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) +{ + upb_rwlock_rdlock(&s->lock); + int total = upb_strtable_count(&s->symtab); + // We may only use part of this, depending on how many symbols are of the + // correct type. + upb_def **defs = malloc(sizeof(*defs) * total); + symtab_ent *e = upb_strtable_begin(&s->symtab); + int i = 0; + for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { + upb_def *def = e->def; + assert(def); + if(type == UPB_DEF_ANY || def->type == type) + defs[i++] = def; + } + upb_rwlock_unlock(&s->lock); + *count = i; + for(i = 0; i < *count; i++) + upb_def_ref(defs[i]); + return defs; +} + +upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) +{ + upb_rwlock_rdlock(&s->lock); + symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + + +upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { + upb_rwlock_rdlock(&s->lock); + symtab_ent *e = resolve(&s->symtab, base, symbol); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { } -- cgit v1.2.3