From 559e23c796f973a65d05c76e211835b126ee8ac8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 17 Jun 2011 10:34:29 -0700 Subject: Major refactoring: abandon upb_msg, add upb_accessors. Next on the chopping block is upb_string. --- src/upb_def.c | 1458 ++++++++++++++++++--------------------------------------- 1 file changed, 461 insertions(+), 997 deletions(-) (limited to 'src/upb_def.c') diff --git a/src/upb_def.c b/src/upb_def.c index 791b885..45e7f73 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -7,30 +7,10 @@ #include #include -#include #include "upb_def.h" -#include "upb_msg.h" #define alignof(t) offsetof(struct { char c; t x; }, x) -static int upb_div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_string *upb_join(upb_string *base, upb_string *name) { - if (!base || upb_string_len(base) == 0) { - return upb_string_getref(name); - } else { - return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, - UPB_STRARG(base), UPB_STRARG(name)); - } -} - /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) { @@ -39,181 +19,18 @@ static int my_memrchr(char *data, char c, size_t len) return off; } -/* upb_def ********************************************************************/ - -// Defs are reference counted, but can have cycles when types are -// self-recursive or mutually recursive, so we need to be capable of collecting -// the cycles. In our situation defs are immutable (so cycles cannot be -// created or destroyed post-initialization). We need to be thread-safe but -// want to avoid locks if at all possible and rely only on atomic operations. -// -// Our scheme is as follows. First we give each def a flag indicating whether -// it is part of a cycle or not. Because defs are immutable, this flag will -// never change. For acyclic defs, we can use a naive algorithm and avoid the -// overhead of dealing with cycles. Most defs will be acyclic, and most cycles -// will be very short. -// -// For defs that participate in cycles we keep two reference counts. One -// tracks references that come from outside the cycle (we call these external -// references), and is incremented and decremented like a regular refcount. -// The other is a cycle refcount, and works as follows. Every cycle is -// considered distinct, even if two cycles share members. For example, this -// graph has two distinct cycles: -// -// A-->B-->C -// ^ | | -// +---+---+ -// -// The cycles in this graph are AB and ABC. When A's external refcount -// transitions from 0->1, we say that A takes "cycle references" on both -// cycles. Taking a cycle reference means incrementing the cycle refcount of -// all defs in the cycle. Since A and B are common to both cycles, A and B's -// cycle refcounts will be incremented by two, and C's will be incremented by -// one. Likewise, when A's external refcount transitions from 1->0, we -// decrement A and B's cycle refcounts by two and C's by one. We collect a -// cyclic type when its cycle refcount drops to zero. A precondition for this -// is that the external refcount has dropped to zero also. -// -// This algorithm is relatively cheap, since it only requires extra work when -// the external refcount on a cyclic type transitions from 0->1 or 1->0. - -static void upb_msgdef_free(upb_msgdef *m); -static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); - -static void upb_def_free(upb_def *def) -{ - switch(def->type) { - case UPB_DEF_MSG: - upb_msgdef_free(upb_downcast_msgdef(def)); - break; - case UPB_DEF_ENUM: - upb_enumdef_free(upb_downcast_enumdef(def)); - break; - case UPB_DEF_SVC: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); - break; - default: - assert(false); - } -} - -// Depth-first search for all cycles that include cycle_base. Returns the -// number of paths from def that lead to cycle_base, which is equivalent to the -// number of cycles def is in that include cycle_base. -// -// open_defs tracks the set of nodes that are currently being visited in the -// search so we can stop the search if we detect a cycles that do not involve -// cycle_base. We can't color the nodes as we go by writing to a member of the -// def, because another thread could be performing the search concurrently. -static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, - upb_msgdef **open_defs, int num_open_defs, - bool ref) { - bool found = false; - for(int i = 0; i < num_open_defs; i++) { - if(open_defs[i] == m) { - // We encountered a cycle that did not involve cycle_base. - found = true; - break; - } - } - - if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { - return 0; - } else if(m == cycle_base) { - return 1; - } else { - int path_count = 0; - if(cycle_base == NULL) { - cycle_base = m; - } else { - open_defs[num_open_defs++] = m; - } - upb_msg_iter iter = upb_msg_begin(m); - for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { - upb_fielddef *f = upb_msg_iter_field(iter); - upb_def *def = f->def; - if(upb_issubmsg(f) && def->is_cyclic) { - upb_msgdef *sub_m = upb_downcast_msgdef(def); - path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, - num_open_defs, ref); - } - } - if(ref) { - upb_atomic_add(&m->cycle_refcount, path_count); - } else { - if(upb_atomic_add(&m->cycle_refcount, -path_count)) - upb_def_free(UPB_UPCAST(m)); - } - return path_count; - } -} - -void _upb_def_reftozero(upb_def *def) { - if(def->is_cyclic) { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); - } else { - upb_def_free(def); - } -} - -void _upb_def_cyclic_ref(upb_def *def) { - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); -} - -static void upb_def_init(upb_def *def, upb_deftype type) { - def->type = type; - def->is_cyclic = 0; // We detect this later, after resolving refs. - def->search_depth = 0; - def->fqname = NULL; - upb_atomic_init(&def->refcount, 1); -} - -static void upb_def_uninit(upb_def *def) { - upb_string_unref(def->fqname); -} - - -/* upb_defbuilder ************************************************************/ - -// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in -// the format defined in descriptor.proto. The output of a upb_defbuilder is -// a list of upb_def* that possibly contain unresolved references. -// -// We use a separate object (upb_defbuilder) instead of having the defs handle -// the parse themselves because we need to store state that is only necessary -// during the building process itself. -// -// All of the handlers registration in this file must be done using the -// low-level upb_register_typed_* interface, since we might not have a msgdef -// yet (in the case of bootstrapping). This makes it more laborious than it -// will be for real users. - -// upb_deflist: A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { +void upb_deflist_init(upb_deflist *l) { l->size = 8; l->defs = malloc(l->size * sizeof(void*)); l->len = 0; } -static void upb_deflist_uninit(upb_deflist *l) { +void upb_deflist_uninit(upb_deflist *l) { for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]); free(l->defs); } -static void upb_deflist_push(upb_deflist *l, upb_def *d) { +void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { l->size *= 2; l->defs = realloc(l->defs, l->size * sizeof(void*)); @@ -221,179 +38,74 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } -static upb_def *upb_deflist_last(upb_deflist *l) { - return l->defs[l->len-1]; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -// We keep a stack of all the messages scopes we are currently in, as well as -// the top-level file scope. This is necessary to correctly qualify the -// definitions that are contained inside. "name" tracks the name of the -// message or package (a bare name -- not qualified by any enclosing scopes). -typedef struct { - upb_string *name; - // Index of the first def that is under this scope. For msgdefs, the - // msgdef itself is at start-1. - int start; -} upb_defbuilder_frame; - -struct _upb_defbuilder { - upb_deflist defs; - upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; - upb_status status; - upb_symtab *symtab; - - uint32_t number; - upb_string *name; - bool saw_number; - bool saw_name; - - upb_string *default_string; - - upb_fielddef *f; -}; - -// Forward declares for top-level file descriptors. -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h); -static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h); - -upb_defbuilder *upb_defbuilder_new(upb_symtab *s) { - upb_defbuilder *b = malloc(sizeof(*b)); - upb_deflist_init(&b->defs); - upb_status_init(&b->status); - b->symtab = s; - b->stack_len = 0; - b->name = NULL; - b->default_string = NULL; - return b; -} - -static void upb_defbuilder_free(upb_defbuilder *b) { - upb_string_unref(b->name); - upb_status_uninit(&b->status); - upb_deflist_uninit(&b->defs); - upb_string_unref(b->default_string); - while (b->stack_len > 0) { - upb_defbuilder_frame *f = &b->stack[--b->stack_len]; - upb_string_unref(f->name); - } - free(b); -} - -static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { - if (b->stack_len <= 1) return NULL; - int index = b->stack[b->stack_len-1].start - 1; - assert(index >= 0); - return upb_downcast_msgdef(b->defs.defs[index]); -} -static upb_def *upb_defbuilder_last(upb_defbuilder *b) { - return upb_deflist_last(&b->defs); -} - -// Start/end handlers for FileDescriptorProto and DescriptorProto (the two -// entities that have names and can contain sub-definitions. -void upb_defbuilder_startcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = &b->stack[b->stack_len++]; - f->start = b->defs.len; - f->name = NULL; -} +/* upb_def ********************************************************************/ -void upb_defbuilder_endcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = &b->stack[--b->stack_len]; - upb_deflist_qualify(&b->defs, f->name, f->start); - upb_string_unref(f->name); -} +static void upb_msgdef_free(upb_msgdef *m); +static void upb_enumdef_free(upb_enumdef *e); +static void upb_unresolveddef_free(struct _upb_unresolveddef *u); -void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { - upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; - upb_string_unref(f->name); - f->name = upb_string_getref(str); -} +#ifndef NDEBUG +static bool upb_def_ismutable(upb_def *def) { return def->symtab == NULL; } +#endif -// Handlers for google.protobuf.FileDescriptorProto. -static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_defbuilder_startcontainer(b); - return UPB_CONTINUE; +static void upb_def_free(upb_def *def) { + switch (def->type) { + case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; + case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; + case UPB_DEF_UNRESOLVED: + upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; + default: + assert(false); + } } -static void upb_defbuilder_FileDescriptorProto_endmsg(void *_b, - upb_status *status) { - (void)status; - upb_defbuilder *b = _b; - upb_defbuilder_endcontainer(b); +upb_def *upb_def_dup(upb_def *def) { + switch (def->type) { + case UPB_DEF_MSG: return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def))); + case UPB_DEF_ENUM: return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def))); + default: assert(false); return NULL; + } } -static upb_flow_t upb_defbuilder_FileDescriptorProto_package(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_defbuilder_setscopename(b, upb_value_getstr(val)); - return UPB_CONTINUE; +// Prior to being in a symtab, the def's refcount controls the lifetime of the +// def itself. If the refcount falls to zero, the def is deleted. Once the +// def belongs to a symtab, the def is owned by the symtab and its refcount +// determines whether the def owns a ref on the symtab or not. +void upb_def_ref(upb_def *def) { + if (upb_atomic_ref(&def->refcount) && def->symtab) + upb_symtab_ref(def->symtab); } -static upb_mhandlers *upb_defbuilder_register_FileDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_defbuilder_FileDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_defbuilder_FileDescriptorProto_endmsg); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); - upb_fhandlers_setvalue(f, &upb_defbuilder_FileDescriptorProto_package); - - upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, - upb_msgdef_register_DescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); - // TODO: services, extensions - return m; +static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) { + assert(upb_atomic_read(&d->refcount) > 0); + d->symtab = s; + if (!upb_atomic_unref(&d->refcount)) upb_symtab_ref(s); + upb_msgdef *m = upb_dyncast_msgdef(d); + if (m) upb_inttable_compact(&m->itof); } -#undef FNUM -#undef FTYPE -// Handlers for google.protobuf.FileDescriptorSet. -static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, - bool allow_redef, upb_status *status); - -static void upb_defbuilder_FileDescriptorSet_onendmsg(void *_b, - upb_status *status) { - upb_defbuilder *b = _b; - if (upb_ok(status)) - upb_symtab_add_defs(b->symtab, b->defs.defs, b->defs.len, false, status); - upb_defbuilder_free(b); +void upb_def_unref(upb_def *def) { + if (!def) return; + if (upb_atomic_unref(&def->refcount)) { + if (def->symtab) { + upb_symtab_unref(def->symtab); + // Def might be deleted now. + } else { + upb_def_free(def); + } + } } -static upb_mhandlers *upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setendmsg(m, upb_defbuilder_FileDescriptorSet_onendmsg); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE - upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true, - upb_defbuilder_register_FileDescriptorProto(h)); - return m; +static void upb_def_init(upb_def *def, upb_deftype_t type) { + def->type = type; + def->fqname = NULL; + def->symtab = NULL; + upb_atomic_init(&def->refcount, 1); } -#undef FNUM -#undef FTYPE -upb_mhandlers *upb_defbuilder_reghandlers(upb_handlers *h) { - h->should_jit = false; - return upb_defbuilder_register_FileDescriptorSet(h); +static void upb_def_uninit(upb_def *def) { + upb_string_unref(def->fqname); } @@ -428,6 +140,14 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ +upb_enumdef *upb_enumdef_new() { + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + return e; +} + static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { @@ -440,129 +160,29 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } -// google.protobuf.EnumValueDescriptorProto. -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - b->saw_number = false; - b->saw_name = false; - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_string_unref(b->name); - b->name = upb_string_getref(upb_value_getstr(val)); - b->saw_name = true; - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->number = upb_value_getint32(val); - b->saw_number = true; - return UPB_CONTINUE; -} - -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b, - upb_status *status) { - upb_defbuilder *b = _b; - if(!b->saw_number || !b->saw_name) { - upb_seterr(status, UPB_ERROR, "Enum value missing name or number."); - return; - } - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - if (upb_inttable_count(&e->iton) == 0) { - // The default value of an enum (in the absence of an explicit default) is - // its first listed value. - e->default_value = b->number; - } - upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; - upb_iton_ent iton_ent = {0, b->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, b->number, &iton_ent); - // We don't unref "name" because we pass our ref to the iton entry of the - // table. strtables can ref their keys, but the inttable doesn't know that - // the value is a string. - b->name = NULL; -} - -static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f; - f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name); - - f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number); - return m; -} -#undef FNUM -#undef FTYPE - -// google.protobuf.EnumDescriptorProto. -static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); - upb_deflist_push(&b->defs, UPB_UPCAST(e)); - return UPB_CONTINUE; -} - -static void upb_enumdef_EnumDescriptorProto_endmsg(void *_b, upb_status *status) { - upb_defbuilder *b = _b; - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - if (upb_defbuilder_last((upb_defbuilder*)_b)->fqname == NULL) { - upb_seterr(status, UPB_ERROR, "Enum had no name."); - return; - } - if (upb_inttable_count(&e->iton) == 0) { - upb_seterr(status, UPB_ERROR, "Enum had no values."); - return; +upb_enumdef *upb_enumdef_dup(upb_enumdef *e) { + upb_enumdef *new_e = upb_enumdef_new(); + upb_enum_iter i; + for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i), + upb_enum_iter_number(i))); } + return new_e; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - upb_string_unref(e->base.fqname); - e->base.fqname = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; +bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) { + if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false; + upb_ntoi_ent ntoi_ent = {{name, 0}, num}; + upb_iton_ent iton_ent = {0, name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name + return true; } -static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name); - - upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true, - upb_enumdef_register_EnumValueDescriptorProto(h)); - return m; +void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { + assert(upb_def_ismutable(UPB_UPCAST(e))); + e->defaultval = val; } -#undef FNUM -#undef FTYPE upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. @@ -573,59 +193,89 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { return upb_inttable_next(&e->iton, iter); } -upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { +upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) { upb_iton_ent *e = (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); return e ? e->string : NULL; } -bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, upb_enumval_t *num) { +bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) { upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name); if (!e) return false; - *num = e->value; + if (num) *num = e->value; return true; } /* upb_fielddef ***************************************************************/ +upb_fielddef *upb_fielddef_new() { + upb_fielddef *f = malloc(sizeof(*f)); + f->msgdef = NULL; + f->def = NULL; + upb_atomic_init(&f->refcount, 1); + f->finalized = false; + f->type = 0; + f->label = UPB_LABEL(OPTIONAL); + f->hasbit = 0; + f->offset = 0; + f->number = 0; // not a valid field number. + f->name = NULL; + f->accessor = NULL; + upb_value_setfielddef(&f->fval, f); + return f; +} + static void upb_fielddef_free(upb_fielddef *f) { if (upb_isstring(f)) { - upb_string_unref(upb_value_getstr(f->default_value)); - } else if (upb_issubmsg(f)) { - upb_msg *m = upb_value_getmsg(f->default_value); - assert(m); - // We cheat a bit here. We need to unref msg, but we don't have a reliable - // way of accessing the msgdef (which is required by upb_msg_unref()), - // because f->def may have already been collected as part of a cycle if - // this is an unowned ref. But we know that default messages never contain - // references to other messages, and their only string references are to - // the singleton empty string, so we can safely unref+free msg directly. - if (upb_atomic_unref(&m->refcount)) free(m); + upb_string_unref(upb_value_getstr(f->defaultval)); } upb_string_unref(f->name); - if(f->owned) { - upb_def_unref(f->def); - } free(f); } +void upb_fielddef_ref(upb_fielddef *f) { + // TODO. + (void)f; +} + +void upb_fielddef_unref(upb_fielddef *f) { + // TODO. + (void)f; + if (!f) return; + if (upb_atomic_unref(&f->refcount)) { + if (f->msgdef) { + upb_msgdef_unref(f->msgdef); + // fielddef might be deleted now. + } else { + upb_fielddef_free(f); + } + } +} + +upb_fielddef *upb_fielddef_dup(upb_fielddef *f) { + upb_fielddef *newf = upb_fielddef_new(); + newf->msgdef = f->msgdef; + newf->type = f->type; + newf->label = f->label; + newf->number = f->number; + newf->name = f->name; + upb_fielddef_settypename(newf, f->def->fqname); + return f; +} + static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { - if(f->owned) upb_def_unref(f->def); + assert(upb_dyncast_unresolveddef(f->def)); + upb_def_unref(f->def); f->def = def; - // We will later make the ref unowned if it is a part of a cycle. - f->owned = true; - upb_def_ref(def); - if (upb_issubmsg(f)) { - upb_msgdef *md = upb_downcast_msgdef(def); - upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message)); - } else if (f->type == UPB_TYPE(ENUM)) { - upb_string *str = upb_value_getstr(f->default_value); + if (f->type == UPB_TYPE(ENUM)) { + // Resolve the enum's default from a string to an integer. + upb_string *str = upb_value_getstr(f->defaultval); assert(str); // Should point to either a real default or the empty string. upb_enumdef *e = upb_downcast_enumdef(f->def); - upb_enumval_t val = 0; + int32_t val = 0; if (str == upb_emptystring()) { - upb_value_setint32(&f->default_value, e->default_value); + upb_value_setint32(&f->defaultval, e->defaultval); } else { bool success = upb_enumdef_ntoi(e, str, &val); upb_string_unref(str); @@ -634,368 +284,201 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { "member of the enum", UPB_STRARG(str)); return false; } - upb_value_setint32(&f->default_value, val); + upb_value_setint32(&f->defaultval, val); } } return true; } -static upb_flow_t upb_fielddef_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_fielddef *f = malloc(sizeof(*f)); - f->number = -1; - f->name = NULL; - f->def = NULL; - f->owned = false; - f->msgdef = upb_defbuilder_top(b); - b->f = f; - return UPB_CONTINUE; -} - -// Converts the default value in string "dstr" into "d". Passes a ref on dstr. -// Returns true on success. -static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) { - bool success = true; - if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { - // We'll keep the ref we had on it. We include enums in this case because - // we need the enumdef to resolve the name, but we may not have it yet. - // We'll resolve it later. - if (dstr) { - upb_value_setstr(d, dstr); - } else { - upb_value_setstr(d, upb_emptystring()); - } - } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { - // We don't expect to get a default value. - upb_string_unref(dstr); - if (dstr != NULL) success = false; - } else { - // The strto* functions need the string to be NULL-terminated. - char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); - char *end; - upb_string_unref(dstr); - switch (type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): - if (strz) { - long val = strtol(strz, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) - success = false; - else - upb_value_setint32(d, val); - } else { - upb_value_setint32(d, 0); - } - break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): - if (strz) { - upb_value_setint64(d, strtoll(strz, &end, 0)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setint64(d, 0); - } - break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - if (strz) { - unsigned long val = strtoul(strz, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) - success = false; - else - upb_value_setuint32(d, val); - } else { - upb_value_setuint32(d, 0); - } - break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - if (strz) { - upb_value_setuint64(d, strtoull(strz, &end, 0)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setuint64(d, 0); - } - break; - case UPB_TYPE(DOUBLE): - if (strz) { - upb_value_setdouble(d, strtod(strz, &end)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setdouble(d, 0.0); - } - break; - case UPB_TYPE(FLOAT): - if (strz) { - upb_value_setfloat(d, strtof(strz, &end)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setfloat(d, 0.0); - } - break; - case UPB_TYPE(BOOL): - if (!strz || strcmp(strz, "false") == 0) - upb_value_setbool(d, false); - else if (strcmp(strz, "true") == 0) - upb_value_setbool(d, true); - else - success = false; - break; - } - free(strz); - } - return success; +void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { + assert(f->msgdef == NULL); + f->number = number; } -static void upb_fielddef_endmsg(void *_b, upb_status *status) { - upb_defbuilder *b = _b; - upb_fielddef *f = b->f; - // TODO: verify that all required fields were present. - assert(f->number != -1 && f->name != NULL); - assert((f->def != NULL) == upb_hasdef(f)); +void upb_fielddef_setname(upb_fielddef *f, upb_string *name) { + assert(f->msgdef == NULL); + f->name = upb_string_getref(name); +} - // Field was successfully read, add it as a field of the msgdef. - upb_msgdef *m = upb_defbuilder_top(b); - upb_itof_ent itof_ent = {0, f->type, upb_types[f->type].native_wire_type, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, f->number, &itof_ent); - upb_strtable_insert(&m->ntof, &ntof_ent.e); +void upb_fielddef_settype(upb_fielddef *f, uint8_t type) { + assert(!f->finalized); + f->type = type; +} - upb_string *dstr = b->default_string; - b->default_string = NULL; - if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) { - // We don't worry too much about giving a great error message since the - // compiler should have ensured this was correct. - upb_seterr(status, UPB_ERROR, "Error converting default value."); - return; - } +void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) { + assert(!f->finalized); + f->label = label; +} +void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { + assert(!f->finalized); + // TODO: string ownership? + f->defaultval = value; } -static upb_flow_t upb_fielddef_ontype(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->f->type = upb_value_getint32(val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_onlabel(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->f->label = upb_value_getint32(val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_onnumber(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->f->number = upb_value_getint32(val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_onname(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_string_unref(b->f->name); - b->f->name = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_ontypename(void *_b, upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); - b->f->owned = true; - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_ondefaultval(void *_b, upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - // Have to convert from string to the correct type, but we might not know the - // type yet. - upb_string_unref(b->default_string); - b->default_string = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg); - upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg); - -#define FIELD(name, handler) \ - upb_fhandlers_setvalue( \ - upb_mhandlers_newfhandlers(m, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \ - false), \ - handler); - FIELD(TYPE, &upb_fielddef_ontype); - FIELD(LABEL, &upb_fielddef_onlabel); - FIELD(NUMBER, &upb_fielddef_onnumber); - FIELD(NAME, &upb_fielddef_onname); - FIELD(TYPE_NAME, &upb_fielddef_ontypename); - FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval); - return m; +void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { + assert(!f->finalized); + // TODO: string ownership? + f->fval = fval; } -#undef FNUM -#undef FTYPE +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) { + assert(!f->finalized); + f->accessor = vtbl; +} -/* upb_msgdef *****************************************************************/ +void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) { + upb_def_unref(f->def); + f->def = UPB_UPCAST(upb_unresolveddef_new(name)); +} -static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { - // Sort by data size (ascending) to reduce padding. +// Returns an ordering of fields based on: +// 1. value size (small to large). +// 2. field number. +static int upb_fielddef_cmpval(const void *_f1, const void *_f2) { + upb_fielddef *f1 = *(void**)_f1; + upb_fielddef *f2 = *(void**)_f2; size_t size1 = upb_types[f1->type].size; size_t size2 = upb_types[f2->type].size; if (size1 != size2) return size1 - size2; - // Otherwise return in number order (just so we get a reproduceable order. + // Otherwise return in number order. return f1->number - f2->number; } -static int upb_compare_fields(const void *f1, const void *f2) { - return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +// Returns an ordering of all fields based on: +// 1. required/optional (required fields first). +// 2. field number +static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) { + upb_fielddef *f1 = *(void**)_f1; + upb_fielddef *f2 = *(void**)_f2; + size_t req1 = f1->label == UPB_LABEL(REQUIRED); + size_t req2 = f2->label == UPB_LABEL(REQUIRED); + if (req1 != req2) return req1 - req2; + // Otherwise return in number order. + return f1->number - f2->number; } -// google.protobuf.DescriptorProto. -static upb_flow_t upb_msgdef_startmsg(void *_b) { - upb_defbuilder *b = _b; + +/* upb_msgdef *****************************************************************/ + +upb_msgdef *upb_msgdef_new() { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); - m->default_message = NULL; - upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b); - return UPB_CONTINUE; -} - -static void upb_msgdef_endmsg(void *_b, upb_status *status) { - upb_defbuilder *b = _b; - upb_msgdef *m = upb_defbuilder_top(b); - if(!m->base.fqname) { - upb_seterr(status, UPB_ERROR, "Encountered message with no name."); - return; + m->size = 0; + m->hasbit_bytes = 0; + m->extension_start = 0; + m->extension_end = 0; + return m; +} + +static void upb_msgdef_free(upb_msgdef *m) { + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_free(upb_msg_iter_field(i)); + upb_strtable_free(&m->ntof); + upb_inttable_free(&m->itof); + upb_def_uninit(&m->base); + free(m); +} + +upb_msgdef *upb_msgdef_dup(upb_msgdef *m) { + upb_msgdef *newm = upb_msgdef_new(); + newm->size = m->size; + newm->hasbit_bytes = m->hasbit_bytes; + newm->extension_start = m->extension_start; + newm->extension_end = m->extension_end; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i))); + return newm; +} + +void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->size = size; +} + +void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->hasbit_bytes = bytes; +} + +void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->extension_start = start; +} + +void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->extension_end = end; +} + +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) { + assert(upb_atomic_read(&f->refcount) > 0); + if (!upb_atomic_unref(&f->refcount)) upb_msgdef_ref(m); + if (upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) { + upb_fielddef_unref(f); + return false; } + assert(f->msgdef == NULL); + f->msgdef = m; + upb_itof_ent itof_ent = {0, f}; + upb_ntof_ent ntof_ent = {{f->name, 0}, f}; + upb_inttable_insert(&m->itof, f->number, &itof_ent); + upb_strtable_insert(&m->ntof, &ntof_ent.e); + return true; +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} - upb_inttable_compact(&m->itof); - // Create an ordering over the fields. - int n = upb_msgdef_numfields(m); - upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); - int field = 0; +void upb_msgdef_layout(upb_msgdef *m) { + // Create an ordering over the fields, but only include fields with accessors. + upb_fielddef **sorted_fields = + malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m)); + int n = 0; upb_msg_iter i; for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - sorted_fields[field++] = upb_msg_iter_field(i); + upb_fielddef *f = upb_msg_iter_field(i); + if (f->accessor) sorted_fields[n++] = f; } - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); - // Assign offsets in the msg. - m->set_flags_bytes = upb_div_round_up(n, 8); - m->size = sizeof(upb_atomic_t) + m->set_flags_bytes; + m->hasbit_bytes = upb_div_round_up(n, 8); + m->size = m->hasbit_bytes; // + header_size? + // Assign hasbits. + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit); + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + f->hasbit = i; + } + + // Assign value offsets. + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval); size_t max_align = 0; for (int i = 0; i < n; i++) { upb_fielddef *f = sorted_fields[i]; const upb_type_info *type_info = &upb_types[f->type]; - - // This identifies the set bit. When we implement is_initialized (a - // general check about whether all required bits are set) we will probably - // want to use a different ordering that puts all the required bits - // together. - f->field_index = i; - f->set_bit_mask = 1 << (i % 8); - f->set_bit_offset = i / 8; - - size_t size, align; - if (upb_isarray(f)) { + size_t size = type_info->size; + size_t align = type_info->align; + if (upb_isseq(f)) { size = sizeof(void*); align = alignof(void*); - } else { - size = type_info->size; - align = type_info->align; } + // General alignment rules are: each member must be at an address that is a // multiple of that type's alignment. Also, the size of the structure as a // whole must be a multiple of the greatest alignment of any member. - size_t offset = upb_align_up(m->size, align); - // Offsets are relative to the end of the refcount. - f->byte_offset = offset - sizeof(upb_atomic_t); - m->size = offset + size; + f->offset = upb_align_up(m->size, align); + m->size = f->offset + size; max_align = UPB_MAX(max_align, align); } - free(sorted_fields); - if (max_align > 0) m->size = upb_align_up(m->size, max_align); - // Create default message instance, an immutable message with all default - // values set (except submessages, which are simply marked as unset). We - // could alternatively leave all set bits unset, but this would make - // upb_msg_get() take its unexpected branch more often for no good reason. - m->default_message = upb_msg_new(m); - for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) { - upb_msg_set(m->default_message, f, f->default_value); - } - } - - upb_defbuilder_endcontainer(b); -} - -static upb_flow_t upb_msgdef_onname(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - assert(val.type == UPB_TYPE(STRING)); - upb_msgdef *m = upb_defbuilder_top(b); - upb_string_unref(m->base.fqname); - m->base.fqname = upb_string_getref(upb_value_getstr(val)); - upb_defbuilder_setscopename(b, upb_value_getstr(val)); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg); - upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_msgdef_onname); - - upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true, - upb_fielddef_register_FieldDescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); - - // DescriptorProto is self-recursive, so we must link the definition. - upb_mhandlers_newfhandlers_subm( - m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m); - - // TODO: extensions. - return m; -} -#undef FNUM -#undef FTYPE - -static void upb_msgdef_free(upb_msgdef *m) -{ - upb_msg_unref(m->default_message, m); - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_free(upb_msg_iter_field(i)); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); - upb_def_uninit(&m->base); - free(m); + free(sorted_fields); } upb_msg_iter upb_msg_begin(upb_msgdef *m) { @@ -1006,22 +489,49 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, iter); } -/* upb_symtab adding defs *****************************************************/ -// This is a self-contained group of functions that, given a list of upb_defs -// whose references are not yet resolved, resolves references and adds them -// atomically to a upb_symtab. +/* upb_symtabtxn **************************************************************/ typedef struct { upb_strtable_entry e; upb_def *def; } upb_symtab_ent; +void upb_symtabtxn_init(upb_symtabtxn *t) { + upb_strtable_init(&t->deftab, 16, sizeof(upb_symtab_ent)); +} + +void upb_symtabtxn_uninit(upb_symtabtxn *txn) { + upb_strtable *t = &txn->deftab; + upb_symtab_ent *e; + for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(t); +} + +bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) { + // TODO: check if already present. + upb_symtab_ent e = {{def->fqname, 0}, def}; + upb_strtable_insert(&t->deftab, &e.e); + return true; +} + +#if 0 +err: + // We need to free all defs from "tmptab." + upb_rwlock_unlock(&s->lock); + for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; + e = upb_strtable_next(&tmptab, &e->e)) { + upb_def_unref(e->def); + } + upb_strtable_free(&tmptab); + return false; +#endif + // Given a symbol and the base symbol inside which it is defined, find the // symbol's definition in t. static upb_symtab_ent *upb_resolve(upb_strtable *t, - upb_string *base, upb_string *sym) -{ + upb_string *base, upb_string *sym) { if(upb_string_len(sym) == 0) return NULL; if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { // Symbols starting with '.' are absolute, so we do a single lookup. @@ -1060,212 +570,63 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, } } -// Performs a pass over the type graph to find all cycles that include m. -static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) -{ - if(depth > UPB_MAX_TYPE_DEPTH) { - // We have found a non-cyclic path from the base of the type tree that - // exceeds the maximum allowed depth. There are many situations in upb - // where we recurse over the type tree (like for example, right now) and an - // absurdly deep tree could cause us to stack overflow on systems with very - // limited stacks. - upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " - "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, depth, - UPB_MAX_TYPE_DEPTH); - return false; - } else if(UPB_UPCAST(m)->search_depth == 1) { - // Cycle! - int cycle_len = depth - 1; - if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " - "in a cycle of length %d, which exceeds the maximum type " - "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, - UPB_MAX_TYPE_CYCLE_LEN); - return false; - } - return true; - } else if(UPB_UPCAST(m)->search_depth > 0) { - // This was a cycle, but did not originate from the base of our search tree. - // We'll find it when we call find_cycles() on this node directly. - return false; - } else { - UPB_UPCAST(m)->search_depth = ++depth; - bool cycle_found = false; - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if(!upb_issubmsg(f)) continue; - upb_def *sub_def = f->def; - upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(upb_symtab_findcycles(sub_m, depth, status)) { - cycle_found = true; - UPB_UPCAST(m)->is_cyclic = true; - if(f->owned) { - upb_atomic_unref(&sub_def->refcount); - f->owned = false; - } - } - } - UPB_UPCAST(m)->search_depth = 0; - return cycle_found; - } +upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) { + return upb_strtable_begin(&t->deftab); } -// Given a table of pending defs "tmptab" and a table of existing defs "symtab", -// resolves all of the unresolved refs for the defs in tmptab. Also resolves -// default values for enumerations and submessages. -bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, - upb_status *status) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // Type names are resolved relative to the message in which they appear. - upb_string *base = e->e.key; - - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_string *name = upb_downcast_unresolveddef(f->def)->name; - - // Resolve from either the tmptab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(tmptab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return false; - } - - // Check the type of the found def. - upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_seterr(status, UPB_ERROR, "Unexpected type"); - return false; - } - if (!upb_fielddef_resolve(f, found->def, status)) return false; - } - } +upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) { + return upb_strtable_next(&t->deftab, i); +} - // Deal with type cycles. - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // The findcycles() call will decrement the external refcount of the - upb_symtab_findcycles(m, 0, status); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } +bool upb_symtabtxn_done(upb_symtabtxn_iter i) { + return i == NULL; +} - return true; +upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) { + upb_symtab_ent *e = iter; + return e->def; } -// Given a list of defs, a list of extensions (in the future), and a flag -// indicating whether the new defs can overwrite existing defs in the symtab, -// attempts to add the given defs to the symtab. The whole operation either -// succeeds or fails. Ownership of "defs" and "exts" is taken. -static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, - bool allow_redef, upb_status *status) -{ - upb_rwlock_wrlock(&s->lock); - // Build a table of the defs we mean to add, for duplicate detection and name - // resolution. - upb_strtable tmptab; - upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); - for (int i = 0; i < num_defs; i++) { - upb_def *def = defs[i]; - upb_symtab_ent e = {{def->fqname, 0}, def}; - - // Redefinition is never allowed within a single FileDescriptorSet. - // Additionally, we only allow overwriting of an existing definition if - // allow_redef is set. - if (upb_strtable_lookup(&tmptab, def->fqname) || - (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, - UPB_STRARG(def->fqname)); - goto err; - } +/* upb_symtab public interface ************************************************/ - // Pass ownership from the deflist to the strtable. - upb_strtable_insert(&tmptab, &e.e); - defs[i] = NULL; +static void _upb_symtab_free(upb_strtable *t) { + upb_symtab_ent *e; + for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) { + assert(upb_atomic_read(&e->def->refcount) == 0); + upb_def_free(e->def); } + upb_strtable_free(t); +} - // TODO: process the list of extensions by modifying entries from - // tmptab in-place (copying them from the symtab first if necessary). - - if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; - - // The defs in tmptab have been vetted, and can be added to the symtab - // without causing errors. Now add all tmptab defs to the symtab, - // overwriting (and releasing a ref on) any existing defs with the same - // names. Ownership for tmptab defs passes from the tmptab to the symtab. - upb_symtab_ent *tmptab_e; - for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; - tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_def_unref(symtab_e->def); - symtab_e->def = tmptab_e->def; - } else { - upb_strtable_insert(&s->symtab, &tmptab_e->e); - } +static void upb_symtab_free(upb_symtab *s) { + _upb_symtab_free(&s->symtab); + for (uint32_t i = 0; i < s->olddefs.len; i++) { + upb_def *d = s->olddefs.defs[i]; + assert(upb_atomic_read(&d->refcount) == 0); + upb_def_free(d); } + upb_rwlock_destroy(&s->lock); + upb_deflist_uninit(&s->olddefs); + free(s); +} - upb_rwlock_unlock(&s->lock); - upb_strtable_free(&tmptab); - return true; - -err: - // We need to free all defs from "tmptab." - upb_rwlock_unlock(&s->lock); - for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) { - upb_def_unref(e->def); +void upb_symtab_unref(upb_symtab *s) { + if(s && upb_atomic_unref(&s->refcount)) { + upb_symtab_free(s); } - upb_strtable_free(&tmptab); - return false; } - -/* upb_symtab public interface ************************************************/ - -upb_symtab *upb_symtab_new() -{ +upb_symtab *upb_symtab_new() { upb_symtab *s = malloc(sizeof(*s)); upb_atomic_init(&s->refcount, 1); upb_rwlock_init(&s->lock); upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - s->fds_msgdef = NULL; + upb_deflist_init(&s->olddefs); return s; } -static void upb_free_symtab(upb_strtable *t) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - upb_free_symtab(&s->symtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) -{ +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { upb_rwlock_rdlock(&s->lock); int total = upb_strtable_count(&s->symtab); // We may only use part of this, depending on how many symbols are of the @@ -1281,13 +642,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) } upb_rwlock_unlock(&s->lock); *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); + for(i = 0; i < *count; i++) upb_def_ref(defs[i]); return defs; } -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) -{ +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) { upb_rwlock_rdlock(&s->lock); upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); upb_def *ret = NULL; @@ -1299,7 +658,6 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) return ret; } - upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { upb_rwlock_rdlock(&s->lock); upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); @@ -1311,3 +669,109 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) upb_rwlock_unlock(&s->lock); return ret; } + +bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n, + upb_symtabtxn *txn) { + // This linear search makes the DFS O(n^2) in the length of the paths. + // Could make this O(n) with a hash table, but n is small. + for (int i = 0; i < n; i++) { + if (def == open_defs[i]) return false; + } + + bool needcopy = false; + upb_msgdef *m = upb_dyncast_msgdef(def); + if (m) { + upb_msg_iter i; + open_defs[n++] = def; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if (!upb_hasdef(f)) continue; + needcopy |= upb_symtab_dfs(f->def, open_defs, n, txn); + } + } + + bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL); + if (needcopy && !replacing) { + upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)}; + upb_strtable_insert(&txn->deftab, &e.e); + replacing = true; + } + return replacing; +} + +bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) { + upb_rwlock_wrlock(&s->lock); + + // All existing defs that can reach defs that are being replaced must + // themselves be replaced with versions that will point to the new defs. + // Do a DFS -- any path that finds a new def must replace all ancestors. + upb_strtable *symtab = &s->symtab; + upb_symtab_ent *e; + for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) { + upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; + upb_symtab_dfs(e->def, open_defs, 0, txn); + } + + // Resolve all refs. + upb_strtable *txntab = &txn->deftab; + for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // Type names are resolved relative to the message in which they appear. + upb_string *base = m->base.fqname; + + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if(!upb_hasdef(f)) continue; // No resolving necessary. + upb_string *name = upb_downcast_unresolveddef(f->def)->name; + + // Resolve from either the txntab (pending adds) or symtab (existing + // defs). If both exist, prefer the pending add, because it will be + // overwriting the existing def. + upb_symtab_ent *found; + if(!(found = upb_resolve(txntab, base, name)) && + !(found = upb_resolve(symtab, base, name))) { + upb_seterr(status, UPB_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return false; + } + + // Check the type of the found def. + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(found->def->type != expected) { + upb_seterr(status, UPB_ERROR, "Unexpected type"); + return false; + } + if (!upb_fielddef_resolve(f, found->def, status)) return false; + } + } + + // The defs in the transaction have been vetted, and can be moved to the + // symtab without causing errors. + upb_symtab_ent *tmptab_e; + for(tmptab_e = upb_strtable_begin(txntab); tmptab_e; + tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) { + upb_def_movetosymtab(tmptab_e->def, s); + upb_symtab_ent *symtab_e = + upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); + if(symtab_e) { + upb_deflist_push(&s->olddefs, symtab_e->def); + symtab_e->def = tmptab_e->def; + } else { + upb_strtable_insert(&s->symtab, &tmptab_e->e); + } + } + + upb_strtable_clear(txntab); + upb_rwlock_unlock(&s->lock); + upb_symtab_gc(s); + return true; +} + +void upb_symtab_gc(upb_symtab *s) { + (void)s; + // TODO. +} -- cgit v1.2.3