From 0a6fc5fad3cee99bf4da97214a2ca3deccc9b132 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 6 Dec 2009 16:29:29 -0800 Subject: Truly fixed type cyclic refcounting. --- src/upb.h | 7 +- src/upb_def.c | 223 ++++++++++++++++++++++++++++++-------------------------- src/upb_def.h | 45 ++++++++---- src/upb_parse.c | 6 +- tests/tests.c | 15 ++++ 5 files changed, 173 insertions(+), 123 deletions(-) diff --git a/src/upb.h b/src/upb.h index 66706c5..237281f 100644 --- a/src/upb.h +++ b/src/upb.h @@ -25,6 +25,7 @@ extern "C" { #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) // The maximum that any submessages can be nested. Matches proto2's limit. #define UPB_MAX_NESTING 64 @@ -37,11 +38,10 @@ typedef int16_t upb_field_count_t; // Nested type names are separated by periods. #define UPB_SYMBOL_SEPARATOR '.' + // This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType #define UPB_SYMBOL_MAXLEN 128 -#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) - // The longest chain that mutually-recursive types are allowed to form. For // example, this is a type cycle of length 2: // message A { @@ -52,6 +52,9 @@ typedef int16_t upb_field_count_t; // } #define UPB_MAX_TYPE_CYCLE_LEN 16 +// The maximum depth that the type graph can have. +#define UPB_MAX_TYPE_DEPTH 64 + /* Fundamental types and type constants. **************************************/ // A list of types as they are encoded on-the-wire. diff --git a/src/upb_def.c b/src/upb_def.c index 48b169d..e48e825 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -34,18 +34,25 @@ static int div_round_up(int numerator, int denominator) { // For defs that participate in cycles we keep two reference counts. One // tracks references that come from outside the cycle (we call these external // references), and is incremented and decremented like a regular refcount. -// The other is a count of how many defs within any of our cycles have external -// references. When a def that is part of a cycle has an external reference, -// we say that it has a "cycle reference" on every def that it is part of a -// cycle with. We free the defs with cycles when the cycle refcount drops to -// zero (which cannot happen unless the external refcount drops to zero first). +// The other is a cycle refcount, and works as follows. Every cycle is +// considered distinct, even if two cycles share members. For example, this +// graph has two distinct cycles: // -// Note that all refs in a cycle will have the same cycle refcount, except for -// ones that belong to more than one cycle which will have the sum of the -// cycle refcounts for all their cycles. +// A-->B-->C +// ^ | | +// +---+---+ // -// This algorithm is relatively cheap, since it only requires extra work on -// initialization and when the regular refcount drops to zero. +// The cycles in this graph are AB and ABC. When A's external refcount +// transitions from 0->1, we say that A takes "cycle references" on both +// cycles. Since A and B are common to both cycles, A and B's cycle refcounts +// will be incremented by two, and C's will be incremented by one. Likewise, +// when A's external refcount transitions from 1->0, we decrement A and B's +// cycle refcounts by two and C's by one. We collect a cyclic type when its +// cycle refcount drops to zero. A precondition for this is that the external +// refcount has dropped to zero also. +// +// This algorithm is relatively cheap, since it only requires extra work when +// the external refcount on a cyclic type transitions from 0->1 or 1->0. static void msgdef_free(struct upb_msgdef *m); static void enumdef_free(struct upb_enumdef *e); @@ -77,63 +84,77 @@ static void def_free(struct upb_def *def) // Depth-first search for all cycles that include cycle_base. Returns the // number of paths from def that lead to cycle_base, which is equivalent to the // number of cycles def is in that include cycle_base. -static int cycle_unref(struct upb_def *def, struct upb_def *cycle_base, - struct upb_def **open_defs, int num_open_defs) { +// +// open_defs tracks the set of nodes that are currently being visited in the +// search so we can stop the search if we detect a cycles that do not involve +// cycle_base. We can't color the nodes as we go by writing to a member of the +// def, because another thread could be performing the search concurrently. +static int cycle_ref_or_unref(struct upb_msgdef *m, + struct upb_msgdef *cycle_base, + struct upb_msgdef **open_defs, int num_open_defs, + bool ref) { + bool found = false; for(int i = 0; i < num_open_defs; i++) { - if(open_defs[i] == def) { + if(open_defs[i] == m) { // We encountered a cycle that did not involve cycle_base. - return 0; + found = true; + break; } } - if(def == cycle_base) { + + if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { + return 0; + } else if(m == cycle_base) { return 1; } else { int path_count = 0; if(cycle_base == NULL) { - cycle_base = def; + cycle_base = m; } else { - open_defs[num_open_defs++] = def; + open_defs[num_open_defs++] = m; } - struct upb_msgdef *m = upb_downcast_msgdef(def); - assert(m); for(int i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - if(upb_issubmsg(f) && f->def->max_cycle_len > 0) - path_count += cycle_unref(f->def, cycle_base, open_defs, num_open_defs); + struct upb_def *def = f->def; + if(upb_issubmsg(f) && def->is_cyclic) { + struct upb_msgdef *sub_m = upb_downcast_msgdef(def); + path_count += cycle_ref_or_unref(sub_m, cycle_base, open_defs, + num_open_defs, ref); + } + } + if(ref) { + upb_atomic_add(&m->cycle_refcount, path_count); + } else { + if(upb_atomic_add(&m->cycle_refcount, -path_count)) + def_free(UPB_UPCAST(m)); } - fprintf(stderr, "removing %d cycle refs from " UPB_STRFMT "\n", path_count, UPB_STRARG(def->fqname)); - if(upb_atomic_add(&def->cycle_refcount, -path_count)) - def_free(def); return path_count; } } void _upb_def_reftozero(struct upb_def *def) { - if(def->max_cycle_len > 0) { - fprintf(stderr, "cycle unreffing " UPB_STRFMT ", max len=%d\n", UPB_STRARG(def->fqname), def->max_cycle_len); - // Traverse the graph, decrementing the cycle refcounts of all cyclic defs - // and deleting any of them that fall to zero. - // - // We track the set of nodes that are currently being visited in the search - // so we can detect cycles that do not involve this def and stop the - // search. We can't color the nodes as we go by writing to a member of the - // def, because another thread could be performing the search concurrently. - struct upb_def *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_unref(def, NULL, open_defs, 0); + if(def->is_cyclic) { + struct upb_msgdef *m = upb_downcast_msgdef(def); + struct upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + cycle_ref_or_unref(m, NULL, open_defs, 0, false); } else { def_free(def); } } +void _upb_def_cyclic_ref(struct upb_def *def) { + struct upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); +} + void upb_def_init(struct upb_def *def, enum upb_def_type type, struct upb_string *fqname) { def->type = type; - def->max_cycle_len = 0; // We increment this later, after resolving refs. - def->visiting_submsg = 0; + def->is_cyclic = 0; // We detect this later, after resolving refs. + def->search_depth = 0; def->fqname = fqname; upb_string_ref(fqname); upb_atomic_refcount_init(&def->refcount, 1); - upb_atomic_refcount_init(&def->cycle_refcount, 0); } void upb_def_uninit(struct upb_def *def) { @@ -257,7 +278,7 @@ static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields, } struct upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG, fqname); - fprintf(stderr, "Created msg: " UPB_STRFMT "\n", UPB_STRARG(fqname)); + upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, num_fields, sizeof(struct upb_itof_ent)); upb_strtable_init(&m->ntof, num_fields, sizeof(struct upb_ntof_ent)); @@ -300,7 +321,6 @@ static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields, static void msgdef_free(struct upb_msgdef *m) { - fprintf(stderr, "Freed msg: " UPB_STRFMT "\n", UPB_STRARG(UPB_UPCAST(m)->fqname)); for (upb_field_count_t i = 0; i < m->num_fields; i++) fielddef_uninit(&m->fields[i]); free(m->fields); @@ -545,71 +565,57 @@ static void insert_message(struct upb_strtable *t, insert_enum(t, d->enum_type->elements[i], fqname, status); } -// Traverses the cycle again to count its length and adjust reference counts. -// Returns the total length of the cycle. -static int process_cycle(struct upb_msgdef *m, struct upb_msgdef *cycle_base, - int partial_cycle_len, struct upb_status *status) { - if(m == cycle_base && partial_cycle_len > 0) { - return partial_cycle_len; - } else { - struct upb_def *def = UPB_UPCAST(m); - partial_cycle_len++; - struct upb_fielddef *f = &m->fields[def->visiting_submsg - 1]; - struct upb_msgdef *sub_m = upb_downcast_msgdef(f->def); - assert(sub_m); - int cycle_len = process_cycle(sub_m, cycle_base, partial_cycle_len, status); +static bool find_cycles(struct upb_msgdef *m, int search_depth, + struct upb_status *status) +{ + if(search_depth > UPB_MAX_TYPE_DEPTH) { + // There are many situations in upb where we recurse over the type tree + // (like for example, right now) and an absurdly deep tree could cause us + // to stack overflow on systems with very limited stacks. + upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " + "depth %d in the type graph, which exceeds the maximum type " + "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, + UPB_MAX_TYPE_DEPTH); + return false; + } else if(UPB_UPCAST(m)->search_depth == 1) { + // Cycle! + int cycle_len = search_depth - 1; if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Cycle in type " UPB_STRFMT " is of " - "length %d, exceeds maximum type cycle length of %d", - cycle_len, UPB_MAX_TYPE_CYCLE_LEN); - return 0; + upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " + "in a cycle of length %d, which exceeds the maximum type " + "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, + UPB_MAX_TYPE_CYCLE_LEN); } - - // Since we know that each def has exactly one external ref (the symtab's - // ref), the number of defs in the cycle is the number of cycle refs we - // should add to each def in the cycle. - upb_atomic_add(&def->cycle_refcount, cycle_len); - - // Remove the regular ref that comes from the cycle. - fprintf(stderr, UPB_STRFMT "\n", UPB_STRARG(def->fqname)); - if(f->owned) { - bool zero = upb_atomic_unref(&f->def->refcount); - f->owned = false; - assert(!zero); // The symtab should hold an external ref. - (void)zero; // Silence "zero not used" warnings on non-debug builds. - } - def->max_cycle_len = UPB_MAX(def->max_cycle_len, cycle_len); - - return cycle_len; - } -} - -// Depth-first search, detecting and marking cycles. -static void find_cycles(struct upb_def *def, struct upb_status *status) { - struct upb_msgdef *m = upb_downcast_msgdef(def); - if(!m) return; - if(def->visiting_submsg) { - // Cycle detected! - fprintf(stderr, "Found cycle in " UPB_STRFMT "\n", UPB_STRARG(def->fqname)); - process_cycle(m, m, 0, status); - } else if(def->max_cycle_len > 0) { - // This def is part of at least one cycle, but we have already discovered - // this node (and consequently all of its cycles) by some other path. + return true; + } else if(UPB_UPCAST(m)->search_depth > 0) { + // This was a cycle, but did not originate from the base of our search tree. + // We'll find it when we call find_cycles() on this node directly. + return false; } else { - for(int i = 0; i < m->num_fields; i++) { + UPB_UPCAST(m)->search_depth = ++search_depth; + bool cycle_found = false; + for(upb_field_count_t i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - // Mark the path we are currently on so we can easily retrace it in - // process_cycle(). - UPB_UPCAST(m)->visiting_submsg = i + 1; - if(upb_issubmsg(f)) find_cycles(f->def, status); + if(!upb_issubmsg(f)) continue; + struct upb_def *sub_def = f->def; + struct upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); + if(find_cycles(sub_m, search_depth, status)) { + cycle_found = true; + UPB_UPCAST(m)->is_cyclic = true; + if(f->owned) { + upb_atomic_unref(&sub_def->refcount); + f->owned = false; + } + } } - UPB_UPCAST(m)->visiting_submsg = 0; + UPB_UPCAST(m)->search_depth = 0; + return cycle_found; } } -void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, - google_protobuf_FileDescriptorProto *fd, bool sort, - struct upb_status *status) +static void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, + google_protobuf_FileDescriptorProto *fd, bool sort, + struct upb_status *status) { struct upb_string *pkg; // Temporary hack until the static data is integrated into our @@ -643,7 +649,7 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, // Attempt to resolve all references. struct symtab_ent *e; for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - struct upb_msgdef *m = upb_downcast_msgdef(e->def); + struct upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; struct upb_string *base = e->e.key; for(upb_field_count_t i = 0; i < m->num_fields; i++) { @@ -667,11 +673,17 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, } } - // Find cycles and update refcounts appropriately. + // Deal with type cycles. for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - // TODO: make sure we don't leak memory if this fails due to excessive - // cycle len. - find_cycles(e->def, status); + struct upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + + // Do an initial pass over the graph to check that there are no cycles + // longer than the maximum length. We also mark all cyclic defs as such, + // and decrement refs on cyclic defs. + find_cycles(m, 0, status); + struct upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + cycle_ref_or_unref(m, NULL, open_defs, 0, true); } } @@ -742,8 +754,13 @@ struct upb_def *upb_symtab_lookup(struct upb_symtab *s, { upb_rwlock_rdlock(&s->lock); struct symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + struct upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } upb_rwlock_unlock(&s->lock); - return e ? e->def : NULL; + return ret; } diff --git a/src/upb_def.h b/src/upb_def.h index bc7e733..8149927 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -54,34 +54,48 @@ struct upb_def { upb_atomic_refcount_t refcount; upb_def_type_t type; - // These members that pertain to cyclic collection could technically go in - // upb_msgdef instead of here, because only messages can be involved in - // cycles. However, putting them here is free from a space perspective - // because structure alignment will otherwise leave three bytes empty between - // type and refcount. It is also makes ref and unref more efficient, because - // we don't have to downcast to msgdef before checking the is_cyclic flag. - // - // See .c file for description of refcounting scheme. - uint8_t max_cycle_len; - upb_field_count_t visiting_submsg; // Used during initialization dfs. - upb_atomic_refcount_t cycle_refcount; + // The is_cyclic flag could go in upb_msgdef instead of here, because only + // messages can be involved in cycles. However, putting them here is free + // from a space perspective because structure alignment will otherwise leave + // three bytes empty after type. It is also makes ref and unref more + // efficient, because we don't have to downcast to msgdef before checking the + // is_cyclic flag. + bool is_cyclic; + uint16_t search_depth; // Used during initialization dfs. }; -void _upb_def_reftozero(struct upb_def *def); // Must not be called directly! +// These must not be called directly! +void _upb_def_cyclic_ref(struct upb_def *def); +void _upb_def_reftozero(struct upb_def *def); // Call to ref/deref a def. INLINE void upb_def_ref(struct upb_def *def) { - upb_atomic_ref(&def->refcount); + if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); } INLINE void upb_def_unref(struct upb_def *def) { if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } -// Downcasts. They are checked only if asserts are enabled. +// Dynamic casts, for determining if a def is of a particular type at runtime. +#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ + struct upb_ ## lower; /* Forward-declare. */ \ + INLINE struct upb_ ## lower *upb_dyncast_ ## lower(struct upb_def *def) { \ + if(def->type != UPB_DEF_ ## upper) return NULL; \ + return (struct upb_ ## lower*)def; \ + } +UPB_DYNAMIC_CAST_DEF(msgdef, MSG); +UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); +UPB_DYNAMIC_CAST_DEF(svcdef, SVC); +UPB_DYNAMIC_CAST_DEF(extdef, EXT); +UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DYNAMIC_CAST_DEF + +// Downcasts, for when some wants to assert that a def is of a particular type. +// These are only checked if we are building debug. #define UPB_DOWNCAST_DEF(lower, upper) \ struct upb_ ## lower; /* Forward-declare. */ \ INLINE struct upb_ ## lower *upb_downcast_ ## lower(struct upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ + assert(def->type == UPB_DEF_ ## upper); \ return (struct upb_ ## lower*)def; \ } UPB_DOWNCAST_DEF(msgdef, MSG); @@ -169,6 +183,7 @@ struct google_protobuf_DescriptorProto; // Structure that describes a single .proto message type. struct upb_msgdef { struct upb_def base; + upb_atomic_refcount_t cycle_refcount; struct upb_msg *default_msg; // Message with all default values set. size_t size; upb_field_count_t num_fields; diff --git a/src/upb_parse.c b/src/upb_parse.c index 8f2c2ff..2948022 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -467,9 +467,9 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, void *_buf, size_t len, buf = delim_end; // Could be >end. } } else { - //if(!f || !upb_check_type(tag.wire_type, f->type)) { - // buf = skip_wire_value(buf, end, tag.wire_type, status); - if (f->type == UPB_TYPE(GROUP)) { + if(!f || !upb_check_type(tag.wire_type, f->type)) { + buf = skip_wire_value(buf, end, tag.wire_type, status); + } else if (f->type == UPB_TYPE(GROUP)) { submsg_end = push(p, start, 0, f, status); msgdef = p->top->msgdef; } else { diff --git a/tests/tests.c b/tests/tests.c index 3580c2e..5704292 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -239,7 +239,22 @@ static void test_upb_symtab() { upb_symtab_add_desc(s, descriptor, &status); ASSERT(upb_ok(&status)); upb_string_unref(descriptor); + + // Test cycle detection by making a cyclic def's main refcount go to zero + // and then be incremented to one again. + struct upb_string *symname = upb_strdupc("A"); + struct upb_def *def = upb_symtab_lookup(s, symname); + upb_string_unref(symname); + ASSERT(def); upb_symtab_unref(s); + struct upb_msgdef *m = upb_downcast_msgdef(def); + struct upb_fielddef *f = &m->fields[0]; + ASSERT(upb_hasdef(f)); + struct upb_def *def2 = f->def; + ASSERT(upb_downcast_msgdef(def2)); + upb_def_ref(def2); + upb_def_unref(def); + upb_def_unref(def2); } -- cgit v1.2.3