From e15f834a916d64b80a0da9cdc4ee0bd4439b6bf4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 6 Dec 2009 13:41:37 -0800 Subject: Circular references truly work now, along with a test. One simplification to come. --- src/upb.h | 10 +++++ src/upb_atomic.h | 5 ++- src/upb_def.c | 112 ++++++++++++++++++++++++++++++++++++++++--------------- src/upb_def.h | 10 ++--- tests/test.proto | 31 +++++++++++++++ 5 files changed, 131 insertions(+), 37 deletions(-) create mode 100644 tests/test.proto diff --git a/src/upb.h b/src/upb.h index 0ee40fb..66706c5 100644 --- a/src/upb.h +++ b/src/upb.h @@ -37,10 +37,20 @@ typedef int16_t upb_field_count_t; // Nested type names are separated by periods. #define UPB_SYMBOL_SEPARATOR '.' +// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType #define UPB_SYMBOL_MAXLEN 128 #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) +// The longest chain that mutually-recursive types are allowed to form. For +// example, this is a type cycle of length 2: +// message A { +// B b = 1; +// } +// message B { +// A a = 1; +// } +#define UPB_MAX_TYPE_CYCLE_LEN 16 /* Fundamental types and type constants. **************************************/ diff --git a/src/upb_atomic.h b/src/upb_atomic.h index 22aa306..e425502 100644 --- a/src/upb_atomic.h +++ b/src/upb_atomic.h @@ -55,7 +55,8 @@ INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { } INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { - return a->val += val; + a->val += val; + return a->val == 0; } typedef struct { @@ -93,7 +94,7 @@ INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { } INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { - return __sync_fetch_and_add(&a->val, n) == 0; + return __sync_add_and_fetch(&a->val, n) == 0; } INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { diff --git a/src/upb_def.c b/src/upb_def.c index bc4f293..48b169d 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -19,11 +19,11 @@ static int div_round_up(int numerator, int denominator) { /* upb_def ********************************************************************/ -// Defs are reference counted, but can have cycles, so we need to be capable of -// collecting the cycles. In our situation defs are immutable (so cycles -// cannot be created or destroyed post-initialization). We need to be -// thread-safe but want to avoid locks if at all possible and rely only on -// atomic operations. +// Defs are reference counted, but can have cycles when types are +// self-recursive or mutually recursive, so we need to be capable of collecting +// the cycles. In our situation defs are immutable (so cycles cannot be +// created or destroyed post-initialization). We need to be thread-safe but +// want to avoid locks if at all possible and rely only on atomic operations. // // Our scheme is as follows. First we give each def a flag indicating whether // it is part of a cycle or not. Because defs are immutable, this flag will @@ -74,21 +74,52 @@ static void def_free(struct upb_def *def) } } -static void cycle_unref(struct upb_def *def) { - struct upb_msgdef *m = upb_downcast_msgdef(def); - assert(m); - for(int i = 0; i < m->num_fields; i++) { - struct upb_fielddef *f = &m->fields[i]; - if(upb_issubmsg(f)) cycle_unref(f->def); +// Depth-first search for all cycles that include cycle_base. Returns the +// number of paths from def that lead to cycle_base, which is equivalent to the +// number of cycles def is in that include cycle_base. +static int cycle_unref(struct upb_def *def, struct upb_def *cycle_base, + struct upb_def **open_defs, int num_open_defs) { + for(int i = 0; i < num_open_defs; i++) { + if(open_defs[i] == def) { + // We encountered a cycle that did not involve cycle_base. + return 0; + } + } + if(def == cycle_base) { + return 1; + } else { + int path_count = 0; + if(cycle_base == NULL) { + cycle_base = def; + } else { + open_defs[num_open_defs++] = def; + } + struct upb_msgdef *m = upb_downcast_msgdef(def); + assert(m); + for(int i = 0; i < m->num_fields; i++) { + struct upb_fielddef *f = &m->fields[i]; + if(upb_issubmsg(f) && f->def->max_cycle_len > 0) + path_count += cycle_unref(f->def, cycle_base, open_defs, num_open_defs); + } + fprintf(stderr, "removing %d cycle refs from " UPB_STRFMT "\n", path_count, UPB_STRARG(def->fqname)); + if(upb_atomic_add(&def->cycle_refcount, -path_count)) + def_free(def); + return path_count; } - if(upb_atomic_unref(&def->cycle_refcount)) def_free(def); } void _upb_def_reftozero(struct upb_def *def) { - if(def->is_cyclic) { + if(def->max_cycle_len > 0) { + fprintf(stderr, "cycle unreffing " UPB_STRFMT ", max len=%d\n", UPB_STRARG(def->fqname), def->max_cycle_len); // Traverse the graph, decrementing the cycle refcounts of all cyclic defs // and deleting any of them that fall to zero. - cycle_unref(def); + // + // We track the set of nodes that are currently being visited in the search + // so we can detect cycles that do not involve this def and stop the + // search. We can't color the nodes as we go by writing to a member of the + // def, because another thread could be performing the search concurrently. + struct upb_def *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + cycle_unref(def, NULL, open_defs, 0); } else { def_free(def); } @@ -97,7 +128,7 @@ void _upb_def_reftozero(struct upb_def *def) { void upb_def_init(struct upb_def *def, enum upb_def_type type, struct upb_string *fqname) { def->type = type; - def->is_cyclic = false; // We check for this later, after resolving refs. + def->max_cycle_len = 0; // We increment this later, after resolving refs. def->visiting_submsg = 0; def->fqname = fqname; upb_string_ref(fqname); @@ -140,9 +171,11 @@ static void fielddef_init(struct upb_fielddef *f, f->number = fd->number; f->name = upb_strdup(fd->name); f->def = NULL; + f->owned = false; assert(fd->set_flags.has.type_name == upb_hasdef(f)); if(fd->set_flags.has.type_name) { f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name)); + f->owned = true; } } @@ -156,7 +189,7 @@ static struct upb_fielddef *fielddef_new( static void fielddef_uninit(struct upb_fielddef *f) { upb_string_unref(f->name); - if(upb_hasdef(f)) { + if(upb_hasdef(f) && f->owned) { upb_def_unref(f->def); } } @@ -167,6 +200,7 @@ static void fielddef_copy(struct upb_fielddef *dst, struct upb_fielddef *src) dst->name = upb_strdup(src->name); if(upb_hasdef(src)) { upb_def_ref(dst->def); + dst->owned = true; } } @@ -223,6 +257,7 @@ static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields, } struct upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG, fqname); + fprintf(stderr, "Created msg: " UPB_STRFMT "\n", UPB_STRARG(fqname)); upb_inttable_init(&m->itof, num_fields, sizeof(struct upb_itof_ent)); upb_strtable_init(&m->ntof, num_fields, sizeof(struct upb_ntof_ent)); @@ -265,6 +300,7 @@ static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields, static void msgdef_free(struct upb_msgdef *m) { + fprintf(stderr, "Freed msg: " UPB_STRFMT "\n", UPB_STRARG(UPB_UPCAST(m)->fqname)); for (upb_field_count_t i = 0; i < m->num_fields; i++) fielddef_uninit(&m->fields[i]); free(m->fields); @@ -285,9 +321,10 @@ static void msgdef_free(struct upb_msgdef *m) static void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, struct upb_def *def) { (void)m; - upb_def_unref(f->def); + if(f->owned) upb_def_unref(f->def); f->def = def; - // We will later remove this ref if it is a part of a cycle. + // We will later make the ref unowned if it is a part of a cycle. + f->owned = true; upb_def_ref(def); } @@ -511,38 +548,51 @@ static void insert_message(struct upb_strtable *t, // Traverses the cycle again to count its length and adjust reference counts. // Returns the total length of the cycle. static int process_cycle(struct upb_msgdef *m, struct upb_msgdef *cycle_base, - int partial_cycle_len) { + int partial_cycle_len, struct upb_status *status) { if(m == cycle_base && partial_cycle_len > 0) { return partial_cycle_len; } else { + struct upb_def *def = UPB_UPCAST(m); partial_cycle_len++; - struct upb_fielddef *f = &m->fields[UPB_UPCAST(m)->visiting_submsg - 1]; + struct upb_fielddef *f = &m->fields[def->visiting_submsg - 1]; struct upb_msgdef *sub_m = upb_downcast_msgdef(f->def); assert(sub_m); - int cycle_len = process_cycle(sub_m, cycle_base, partial_cycle_len); + int cycle_len = process_cycle(sub_m, cycle_base, partial_cycle_len, status); + if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { + upb_seterr(status, UPB_STATUS_ERROR, "Cycle in type " UPB_STRFMT " is of " + "length %d, exceeds maximum type cycle length of %d", + cycle_len, UPB_MAX_TYPE_CYCLE_LEN); + return 0; + } // Since we know that each def has exactly one external ref (the symtab's // ref), the number of defs in the cycle is the number of cycle refs we // should add to each def in the cycle. - upb_atomic_add(&UPB_UPCAST(m)->cycle_refcount, cycle_len); + upb_atomic_add(&def->cycle_refcount, cycle_len); // Remove the regular ref that comes from the cycle. - bool zero = upb_atomic_unref(&UPB_UPCAST(m)->refcount); - assert(!zero); // The symtab should hold an external ref. - (void)zero; // Silence "zero not used" warnings on non-debug builds. + fprintf(stderr, UPB_STRFMT "\n", UPB_STRARG(def->fqname)); + if(f->owned) { + bool zero = upb_atomic_unref(&f->def->refcount); + f->owned = false; + assert(!zero); // The symtab should hold an external ref. + (void)zero; // Silence "zero not used" warnings on non-debug builds. + } + def->max_cycle_len = UPB_MAX(def->max_cycle_len, cycle_len); return cycle_len; } } // Depth-first search, detecting and marking cycles. -static void find_cycles(struct upb_def *def) { +static void find_cycles(struct upb_def *def, struct upb_status *status) { struct upb_msgdef *m = upb_downcast_msgdef(def); if(!m) return; if(def->visiting_submsg) { // Cycle detected! - process_cycle(m, m, 0); - } else if(upb_atomic_read(&def->cycle_refcount)) { + fprintf(stderr, "Found cycle in " UPB_STRFMT "\n", UPB_STRARG(def->fqname)); + process_cycle(m, m, 0, status); + } else if(def->max_cycle_len > 0) { // This def is part of at least one cycle, but we have already discovered // this node (and consequently all of its cycles) by some other path. } else { @@ -551,7 +601,7 @@ static void find_cycles(struct upb_def *def) { // Mark the path we are currently on so we can easily retrace it in // process_cycle(). UPB_UPCAST(m)->visiting_submsg = i + 1; - if(upb_issubmsg(f)) find_cycles(f->def); + if(upb_issubmsg(f)) find_cycles(f->def, status); } UPB_UPCAST(m)->visiting_submsg = 0; } @@ -619,7 +669,9 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, // Find cycles and update refcounts appropriately. for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - find_cycles(e->def); + // TODO: make sure we don't leak memory if this fails due to excessive + // cycle len. + find_cycles(e->def, status); } } diff --git a/src/upb_def.h b/src/upb_def.h index 09ae5d2..bc7e733 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -49,9 +49,9 @@ enum upb_def_type { // This typedef is more space-efficient than declaring an enum var directly. typedef uint8_t upb_def_type_t; -// Common members. struct upb_def { struct upb_string *fqname; // Fully qualified. + upb_atomic_refcount_t refcount; upb_def_type_t type; // These members that pertain to cyclic collection could technically go in @@ -60,11 +60,10 @@ struct upb_def { // because structure alignment will otherwise leave three bytes empty between // type and refcount. It is also makes ref and unref more efficient, because // we don't have to downcast to msgdef before checking the is_cyclic flag. - bool is_cyclic; // Is this def part of a cycle? - upb_field_count_t visiting_submsg; // Helper for depth-first search. - + // // See .c file for description of refcounting scheme. - upb_atomic_refcount_t refcount; + uint8_t max_cycle_len; + upb_field_count_t visiting_submsg; // Used during initialization dfs. upb_atomic_refcount_t cycle_refcount; }; @@ -113,6 +112,7 @@ struct upb_fielddef { // For the case of an enum or a submessage, points to the def for that type. // We own a ref on this def. + bool owned; struct upb_def *def; }; diff --git a/tests/test.proto b/tests/test.proto new file mode 100644 index 0000000..b51bd6b --- /dev/null +++ b/tests/test.proto @@ -0,0 +1,31 @@ + +// A series of messages with various kinds of cycles in them. +// +-+---+ +---+ +// V | | | | +// A -> B-+-> C -> D<--+ +// ^ | | +// +----------+----+ +// +// This tests the following cases: +// - B and C are together in multiple cycles +// - B and D are cycles to themselves. + +message A { + optional B b = 1; +} + +message B { + optional B b = 1; + optional C c = 2; +} + +message C { + optional A a = 1; + optional B b = 2; + optional D d = 3; +} + +message D { + optional A a = 1; + optional D d = 2; +} -- cgit v1.2.3