From b72ed3b97a41df5f6690c8d5c966fa33e75213d5 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 12 Jun 2015 15:23:56 -0700 Subject: Fix for stack overflow for cyclic defs. Fixes this bug that came up in the Ruby extension: https://github.com/google/protobuf/issues/425 --- tests/test_def.c | 13 +++++++++++++ upb/symtab.c | 48 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/tests/test_def.c b/tests/test_def.c index 4f9fcff..c33d584 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -246,6 +246,18 @@ static void test_replacement() { upb_symtab_unref(s, &s); } +static void test_cycles_in_replacement() { + upb_symtab *s = upb_symtab_new(&s); + upb_msgdef *m = upb_msgdef_newnamed("M", &s); + upb_status status = UPB_STATUS_INIT; + + upb_msgdef_addfield(m, newfield("m", 1, UPB_TYPE_MESSAGE, + UPB_LABEL_OPTIONAL, ".M", &s), + &s, NULL); + ASSERT_STATUS(upb_symtab_add(s, (upb_def**)&m, 1, &s, &status), &status); + ASSERT_STATUS(upb_symtab_add(s, NULL, 0, &s, &status), &status); +} + static void test_freeze_free() { bool ok; @@ -459,6 +471,7 @@ int run_tests(int argc, char *argv[]) { test_fielddef(); test_fielddef_unref(); test_replacement(); + test_cycles_in_replacement(); test_freeze_free(); test_partial_freeze(); test_noreftracking(); diff --git a/upb/symtab.c b/upb/symtab.c index 6e6b6b8..c8e696b 100644 --- a/upb/symtab.c +++ b/upb/symtab.c @@ -91,14 +91,33 @@ const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, return ret; } -/* Searches def and its children to find defs that have the same name as any - * def in "addtab." Returns true if any where found, and as a side-effect adds - * duplicates of these defs into addtab. +/* Starts a depth-first traversal at def, recursing into any subdefs + * (ie. submessage types). Adds duplicates of existing defs to addtab + * wherever necessary, so that the resulting symtab will be consistent once + * addtab is added. * - * We use a modified depth-first traversal that traverses each SCC (which we - * already computed) as if it were a single node. This allows us to traverse - * the possibly-cyclic graph as if it were a DAG and to dup the correct set of - * nodes with O(n) time. */ + * More specifically, if any defs D is found in the DFS that: + * + * 1. can reach a def that is being replaced (because it has the same full + * name as a def in addtab, AND + * + * 2. is not itself being replaced already (ie. no def with this name exists + * in addtab). + * + * ...then a duplicate (new copy) of D will be added to addtab. + * + * Returns true if "def" can reach any def that is being replaced. + * + * It is slightly tricky to do this correctly in the place of cycles. If we + * detect that our DFS has hit a cycle, we don't yet know if this SCC can reach + * a def in addtab or not. Once we figure this out, that answer needs to apply + * to *all* defs in the SCC, even if we visited them already. + * + * To work around this problem, we traverse each SCC (which we already + * computed, since these defs are frozen) as a single node. We first compute + * whether the SCC as a whole can reach a def in addtab, then we dup (or not) + * the entire SCC. This requires breaking the encapsulation of upb_refcounted, + * since that is where we get the data about what SCC we are in. */ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, const void *new_owner, upb_inttable *seen, upb_status *s) { @@ -124,7 +143,8 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, need_dup = true; } - /* For messages, continue the recursion by visiting all subdefs. */ + /* For messages, continue the recursion by visiting all subdefs, but only + * ones in different SCCs. */ m = upb_dyncast_msgdef(def); if (m) { upb_msg_field_iter i; @@ -132,17 +152,23 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, !upb_msg_field_done(&i); upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); + const upb_def *subdef; + if (!upb_fielddef_hassubdef(f)) continue; + subdef = upb_fielddef_subdef(f); + + /* Skip subdefs in this SCC. */ + if (def->base.group == subdef->base.group) continue; + /* |= to avoid short-circuit; we need its side-effects. */ - need_dup |= upb_resolve_dfs( - upb_fielddef_subdef(f), addtab, new_owner, seen, s); + need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s); if (!upb_ok(s)) return false; } } } while ((def = (upb_def*)def->base.next) != base); if (need_dup) { - /* Dup any defs that don't already have entries in addtab. */ + /* Dup all defs in this SCC that don't already have entries in addtab. */ def = base; do { const char *name; -- cgit v1.2.3 From e3a7a4cf7b857521b87c667899c04fc05358a278 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Mon, 15 Jun 2015 17:44:11 -0700 Subject: Addressed code review comment and clarified comments. --- upb/symtab.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/upb/symtab.c b/upb/symtab.c index c8e696b..0ce3e18 100644 --- a/upb/symtab.c +++ b/upb/symtab.c @@ -91,43 +91,48 @@ const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, return ret; } -/* Starts a depth-first traversal at def, recursing into any subdefs +/* Starts a depth-first traversal at "def", recursing into any subdefs * (ie. submessage types). Adds duplicates of existing defs to addtab * wherever necessary, so that the resulting symtab will be consistent once * addtab is added. * - * More specifically, if any defs D is found in the DFS that: + * More specifically, if any def D is found in the DFS that: * - * 1. can reach a def that is being replaced (because it has the same full - * name as a def in addtab, AND + * 1. can reach a def that is being replaced by something in addtab, AND * - * 2. is not itself being replaced already (ie. no def with this name exists - * in addtab). + * 2. is not itself being replaced already (ie. this name doesn't already + * exist in addtab) * * ...then a duplicate (new copy) of D will be added to addtab. * - * Returns true if "def" can reach any def that is being replaced. + * Returns true if this happened for any def reachable from "def." * - * It is slightly tricky to do this correctly in the place of cycles. If we - * detect that our DFS has hit a cycle, we don't yet know if this SCC can reach - * a def in addtab or not. Once we figure this out, that answer needs to apply - * to *all* defs in the SCC, even if we visited them already. + * It is slightly tricky to do this correctly in the presence of cycles. If we + * detect that our DFS has hit a cycle, we might not yet know if any SCCs on + * our stack can reach a def in addtab or not. Once we figure this out, that + * answer needs to apply to *all* defs in these SCCs, even if we visited them + * already. So a straight up one-pass cycle-detecting DFS won't work. * * To work around this problem, we traverse each SCC (which we already * computed, since these defs are frozen) as a single node. We first compute - * whether the SCC as a whole can reach a def in addtab, then we dup (or not) + * whether the SCC as a whole can reach any def in addtab, then we dup (or not) * the entire SCC. This requires breaking the encapsulation of upb_refcounted, * since that is where we get the data about what SCC we are in. */ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, const void *new_owner, upb_inttable *seen, upb_status *s) { - /* Memoize results of this function for efficiency (since we're traversing a - * DAG this is not needed to limit the depth of the search). */ upb_value v; bool need_dup; const upb_def *base; + const void* memoize_key; + + /* Memoize results of this function for efficiency (since we're traversing a + * DAG this is not needed to limit the depth of the search). + * + * We memoize by SCC instead of by individual def. */ + memoize_key = def->base.group; - if (upb_inttable_lookup(seen, (uintptr_t)def, &v)) + if (upb_inttable_lookupptr(seen, memoize_key, &v)) return upb_value_getbool(v); /* Visit submessages for all messages in the SCC. */ @@ -185,7 +190,7 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, } while ((def = (upb_def*)def->base.next) != base); } - upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); + upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup)); return need_dup; oom: -- cgit v1.2.3