summaryrefslogtreecommitdiff
path: root/src/upb_def.c
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2011-06-17 10:34:29 -0700
committerJoshua Haberman <jhaberman@gmail.com>2011-06-17 10:34:29 -0700
commit559e23c796f973a65d05c76e211835b126ee8ac8 (patch)
treeec359628f3b93b88718cc34e5ec86ca6e6c6c1e2 /src/upb_def.c
parenta503b8859c37906ab5012db163daca43bfe393bb (diff)
Major refactoring: abandon upb_msg, add upb_accessors.
Next on the chopping block is upb_string.
Diffstat (limited to 'src/upb_def.c')
-rw-r--r--src/upb_def.c1458
1 files changed, 461 insertions, 997 deletions
diff --git a/src/upb_def.c b/src/upb_def.c
index 791b885..45e7f73 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -7,30 +7,10 @@
#include <stdlib.h>
#include <stddef.h>
-#include <errno.h>
#include "upb_def.h"
-#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
-static int upb_div_round_up(int numerator, int denominator) {
- /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
- return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
-}
-
-/* Joins strings together, for example:
- * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
- * join("", "Baz") -> "Baz"
- * Caller owns a ref on the returned string. */
-static upb_string *upb_join(upb_string *base, upb_string *name) {
- if (!base || upb_string_len(base) == 0) {
- return upb_string_getref(name);
- } else {
- return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
- UPB_STRARG(base), UPB_STRARG(name));
- }
-}
-
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
{
@@ -39,181 +19,18 @@ static int my_memrchr(char *data, char c, size_t len)
return off;
}
-/* upb_def ********************************************************************/
-
-// Defs are reference counted, but can have cycles when types are
-// self-recursive or mutually recursive, so we need to be capable of collecting
-// the cycles. In our situation defs are immutable (so cycles cannot be
-// created or destroyed post-initialization). We need to be thread-safe but
-// want to avoid locks if at all possible and rely only on atomic operations.
-//
-// Our scheme is as follows. First we give each def a flag indicating whether
-// it is part of a cycle or not. Because defs are immutable, this flag will
-// never change. For acyclic defs, we can use a naive algorithm and avoid the
-// overhead of dealing with cycles. Most defs will be acyclic, and most cycles
-// will be very short.
-//
-// For defs that participate in cycles we keep two reference counts. One
-// tracks references that come from outside the cycle (we call these external
-// references), and is incremented and decremented like a regular refcount.
-// The other is a cycle refcount, and works as follows. Every cycle is
-// considered distinct, even if two cycles share members. For example, this
-// graph has two distinct cycles:
-//
-// A-->B-->C
-// ^ | |
-// +---+---+
-//
-// The cycles in this graph are AB and ABC. When A's external refcount
-// transitions from 0->1, we say that A takes "cycle references" on both
-// cycles. Taking a cycle reference means incrementing the cycle refcount of
-// all defs in the cycle. Since A and B are common to both cycles, A and B's
-// cycle refcounts will be incremented by two, and C's will be incremented by
-// one. Likewise, when A's external refcount transitions from 1->0, we
-// decrement A and B's cycle refcounts by two and C's by one. We collect a
-// cyclic type when its cycle refcount drops to zero. A precondition for this
-// is that the external refcount has dropped to zero also.
-//
-// This algorithm is relatively cheap, since it only requires extra work when
-// the external refcount on a cyclic type transitions from 0->1 or 1->0.
-
-static void upb_msgdef_free(upb_msgdef *m);
-static void upb_enumdef_free(upb_enumdef *e);
-static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-
-static void upb_def_free(upb_def *def)
-{
- switch(def->type) {
- case UPB_DEF_MSG:
- upb_msgdef_free(upb_downcast_msgdef(def));
- break;
- case UPB_DEF_ENUM:
- upb_enumdef_free(upb_downcast_enumdef(def));
- break;
- case UPB_DEF_SVC:
- assert(false); /* Unimplemented. */
- break;
- case UPB_DEF_UNRESOLVED:
- upb_unresolveddef_free(upb_downcast_unresolveddef(def));
- break;
- default:
- assert(false);
- }
-}
-
-// Depth-first search for all cycles that include cycle_base. Returns the
-// number of paths from def that lead to cycle_base, which is equivalent to the
-// number of cycles def is in that include cycle_base.
-//
-// open_defs tracks the set of nodes that are currently being visited in the
-// search so we can stop the search if we detect a cycles that do not involve
-// cycle_base. We can't color the nodes as we go by writing to a member of the
-// def, because another thread could be performing the search concurrently.
-static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base,
- upb_msgdef **open_defs, int num_open_defs,
- bool ref) {
- bool found = false;
- for(int i = 0; i < num_open_defs; i++) {
- if(open_defs[i] == m) {
- // We encountered a cycle that did not involve cycle_base.
- found = true;
- break;
- }
- }
-
- if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) {
- return 0;
- } else if(m == cycle_base) {
- return 1;
- } else {
- int path_count = 0;
- if(cycle_base == NULL) {
- cycle_base = m;
- } else {
- open_defs[num_open_defs++] = m;
- }
- upb_msg_iter iter = upb_msg_begin(m);
- for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) {
- upb_fielddef *f = upb_msg_iter_field(iter);
- upb_def *def = f->def;
- if(upb_issubmsg(f) && def->is_cyclic) {
- upb_msgdef *sub_m = upb_downcast_msgdef(def);
- path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs,
- num_open_defs, ref);
- }
- }
- if(ref) {
- upb_atomic_add(&m->cycle_refcount, path_count);
- } else {
- if(upb_atomic_add(&m->cycle_refcount, -path_count))
- upb_def_free(UPB_UPCAST(m));
- }
- return path_count;
- }
-}
-
-void _upb_def_reftozero(upb_def *def) {
- if(def->is_cyclic) {
- upb_msgdef *m = upb_downcast_msgdef(def);
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false);
- } else {
- upb_def_free(def);
- }
-}
-
-void _upb_def_cyclic_ref(upb_def *def) {
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true);
-}
-
-static void upb_def_init(upb_def *def, upb_deftype type) {
- def->type = type;
- def->is_cyclic = 0; // We detect this later, after resolving refs.
- def->search_depth = 0;
- def->fqname = NULL;
- upb_atomic_init(&def->refcount, 1);
-}
-
-static void upb_def_uninit(upb_def *def) {
- upb_string_unref(def->fqname);
-}
-
-
-/* upb_defbuilder ************************************************************/
-
-// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in
-// the format defined in descriptor.proto. The output of a upb_defbuilder is
-// a list of upb_def* that possibly contain unresolved references.
-//
-// We use a separate object (upb_defbuilder) instead of having the defs handle
-// the parse themselves because we need to store state that is only necessary
-// during the building process itself.
-//
-// All of the handlers registration in this file must be done using the
-// low-level upb_register_typed_* interface, since we might not have a msgdef
-// yet (in the case of bootstrapping). This makes it more laborious than it
-// will be for real users.
-
-// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
-typedef struct {
- upb_def **defs;
- uint32_t len;
- uint32_t size;
-} upb_deflist;
-
-static void upb_deflist_init(upb_deflist *l) {
+void upb_deflist_init(upb_deflist *l) {
l->size = 8;
l->defs = malloc(l->size * sizeof(void*));
l->len = 0;
}
-static void upb_deflist_uninit(upb_deflist *l) {
+void upb_deflist_uninit(upb_deflist *l) {
for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]);
free(l->defs);
}
-static void upb_deflist_push(upb_deflist *l, upb_def *d) {
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
if(l->len == l->size) {
l->size *= 2;
l->defs = realloc(l->defs, l->size * sizeof(void*));
@@ -221,179 +38,74 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) {
l->defs[l->len++] = d;
}
-static upb_def *upb_deflist_last(upb_deflist *l) {
- return l->defs[l->len-1];
-}
-
-// Qualify the defname for all defs starting with offset "start" with "str".
-static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
- for(uint32_t i = start; i < l->len; i++) {
- upb_def *def = l->defs[i];
- upb_string *name = def->fqname;
- def->fqname = upb_join(str, name);
- upb_string_unref(name);
- }
-}
-
-// We keep a stack of all the messages scopes we are currently in, as well as
-// the top-level file scope. This is necessary to correctly qualify the
-// definitions that are contained inside. "name" tracks the name of the
-// message or package (a bare name -- not qualified by any enclosing scopes).
-typedef struct {
- upb_string *name;
- // Index of the first def that is under this scope. For msgdefs, the
- // msgdef itself is at start-1.
- int start;
-} upb_defbuilder_frame;
-
-struct _upb_defbuilder {
- upb_deflist defs;
- upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH];
- int stack_len;
- upb_status status;
- upb_symtab *symtab;
-
- uint32_t number;
- upb_string *name;
- bool saw_number;
- bool saw_name;
-
- upb_string *default_string;
-
- upb_fielddef *f;
-};
-
-// Forward declares for top-level file descriptors.
-static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
-static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
-
-upb_defbuilder *upb_defbuilder_new(upb_symtab *s) {
- upb_defbuilder *b = malloc(sizeof(*b));
- upb_deflist_init(&b->defs);
- upb_status_init(&b->status);
- b->symtab = s;
- b->stack_len = 0;
- b->name = NULL;
- b->default_string = NULL;
- return b;
-}
-
-static void upb_defbuilder_free(upb_defbuilder *b) {
- upb_string_unref(b->name);
- upb_status_uninit(&b->status);
- upb_deflist_uninit(&b->defs);
- upb_string_unref(b->default_string);
- while (b->stack_len > 0) {
- upb_defbuilder_frame *f = &b->stack[--b->stack_len];
- upb_string_unref(f->name);
- }
- free(b);
-}
-
-static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
- if (b->stack_len <= 1) return NULL;
- int index = b->stack[b->stack_len-1].start - 1;
- assert(index >= 0);
- return upb_downcast_msgdef(b->defs.defs[index]);
-}
-static upb_def *upb_defbuilder_last(upb_defbuilder *b) {
- return upb_deflist_last(&b->defs);
-}
-
-// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
-// entities that have names and can contain sub-definitions.
-void upb_defbuilder_startcontainer(upb_defbuilder *b) {
- upb_defbuilder_frame *f = &b->stack[b->stack_len++];
- f->start = b->defs.len;
- f->name = NULL;
-}
+/* upb_def ********************************************************************/
-void upb_defbuilder_endcontainer(upb_defbuilder *b) {
- upb_defbuilder_frame *f = &b->stack[--b->stack_len];
- upb_deflist_qualify(&b->defs, f->name, f->start);
- upb_string_unref(f->name);
-}
+static void upb_msgdef_free(upb_msgdef *m);
+static void upb_enumdef_free(upb_enumdef *e);
+static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) {
- upb_defbuilder_frame *f = &b->stack[b->stack_len-1];
- upb_string_unref(f->name);
- f->name = upb_string_getref(str);
-}
+#ifndef NDEBUG
+static bool upb_def_ismutable(upb_def *def) { return def->symtab == NULL; }
+#endif
-// Handlers for google.protobuf.FileDescriptorProto.
-static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_defbuilder_startcontainer(b);
- return UPB_CONTINUE;
+static void upb_def_free(upb_def *def) {
+ switch (def->type) {
+ case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
+ case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
+ case UPB_DEF_UNRESOLVED:
+ upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break;
+ default:
+ assert(false);
+ }
}
-static void upb_defbuilder_FileDescriptorProto_endmsg(void *_b,
- upb_status *status) {
- (void)status;
- upb_defbuilder *b = _b;
- upb_defbuilder_endcontainer(b);
+upb_def *upb_def_dup(upb_def *def) {
+ switch (def->type) {
+ case UPB_DEF_MSG: return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def)));
+ case UPB_DEF_ENUM: return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def)));
+ default: assert(false); return NULL;
+ }
}
-static upb_flow_t upb_defbuilder_FileDescriptorProto_package(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_defbuilder_setscopename(b, upb_value_getstr(val));
- return UPB_CONTINUE;
+// Prior to being in a symtab, the def's refcount controls the lifetime of the
+// def itself. If the refcount falls to zero, the def is deleted. Once the
+// def belongs to a symtab, the def is owned by the symtab and its refcount
+// determines whether the def owns a ref on the symtab or not.
+void upb_def_ref(upb_def *def) {
+ if (upb_atomic_ref(&def->refcount) && def->symtab)
+ upb_symtab_ref(def->symtab);
}
-static upb_mhandlers *upb_defbuilder_register_FileDescriptorProto(
- upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_defbuilder_FileDescriptorProto_startmsg);
- upb_mhandlers_setendmsg(m, &upb_defbuilder_FileDescriptorProto_endmsg);
-
-#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
-#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
- upb_fhandlers *f =
- upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
- upb_fhandlers_setvalue(f, &upb_defbuilder_FileDescriptorProto_package);
-
- upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
- upb_msgdef_register_DescriptorProto(h));
- upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
- upb_enumdef_register_EnumDescriptorProto(h));
- // TODO: services, extensions
- return m;
+static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) {
+ assert(upb_atomic_read(&d->refcount) > 0);
+ d->symtab = s;
+ if (!upb_atomic_unref(&d->refcount)) upb_symtab_ref(s);
+ upb_msgdef *m = upb_dyncast_msgdef(d);
+ if (m) upb_inttable_compact(&m->itof);
}
-#undef FNUM
-#undef FTYPE
-// Handlers for google.protobuf.FileDescriptorSet.
-static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs,
- bool allow_redef, upb_status *status);
-
-static void upb_defbuilder_FileDescriptorSet_onendmsg(void *_b,
- upb_status *status) {
- upb_defbuilder *b = _b;
- if (upb_ok(status))
- upb_symtab_add_defs(b->symtab, b->defs.defs, b->defs.len, false, status);
- upb_defbuilder_free(b);
+void upb_def_unref(upb_def *def) {
+ if (!def) return;
+ if (upb_atomic_unref(&def->refcount)) {
+ if (def->symtab) {
+ upb_symtab_unref(def->symtab);
+ // Def might be deleted now.
+ } else {
+ upb_def_free(def);
+ }
+ }
}
-static upb_mhandlers *upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setendmsg(m, upb_defbuilder_FileDescriptorSet_onendmsg);
-
-#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
-#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
- upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
- upb_defbuilder_register_FileDescriptorProto(h));
- return m;
+static void upb_def_init(upb_def *def, upb_deftype_t type) {
+ def->type = type;
+ def->fqname = NULL;
+ def->symtab = NULL;
+ upb_atomic_init(&def->refcount, 1);
}
-#undef FNUM
-#undef FTYPE
-upb_mhandlers *upb_defbuilder_reghandlers(upb_handlers *h) {
- h->should_jit = false;
- return upb_defbuilder_register_FileDescriptorSet(h);
+static void upb_def_uninit(upb_def *def) {
+ upb_string_unref(def->fqname);
}
@@ -428,6 +140,14 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
/* upb_enumdef ****************************************************************/
+upb_enumdef *upb_enumdef_new() {
+ upb_enumdef *e = malloc(sizeof(*e));
+ upb_def_init(&e->base, UPB_DEF_ENUM);
+ upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
+ upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
+ return e;
+}
+
static void upb_enumdef_free(upb_enumdef *e) {
upb_enum_iter i;
for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
@@ -440,129 +160,29 @@ static void upb_enumdef_free(upb_enumdef *e) {
free(e);
}
-// google.protobuf.EnumValueDescriptorProto.
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- b->saw_number = false;
- b->saw_name = false;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_string_unref(b->name);
- b->name = upb_string_getref(upb_value_getstr(val));
- b->saw_name = true;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->number = upb_value_getint32(val);
- b->saw_number = true;
- return UPB_CONTINUE;
-}
-
-static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b,
- upb_status *status) {
- upb_defbuilder *b = _b;
- if(!b->saw_number || !b->saw_name) {
- upb_seterr(status, UPB_ERROR, "Enum value missing name or number.");
- return;
- }
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- if (upb_inttable_count(&e->iton) == 0) {
- // The default value of an enum (in the absence of an explicit default) is
- // its first listed value.
- e->default_value = b->number;
- }
- upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number};
- upb_iton_ent iton_ent = {0, b->name};
- upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
- upb_inttable_insert(&e->iton, b->number, &iton_ent);
- // We don't unref "name" because we pass our ref to the iton entry of the
- // table. strtables can ref their keys, but the inttable doesn't know that
- // the value is a string.
- b->name = NULL;
-}
-
-static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
- upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
- upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
- upb_fhandlers *f;
- f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
- upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
-
- f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
- upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
- return m;
-}
-#undef FNUM
-#undef FTYPE
-
-// google.protobuf.EnumDescriptorProto.
-static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_enumdef *e = malloc(sizeof(*e));
- upb_def_init(&e->base, UPB_DEF_ENUM);
- upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
- upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
- upb_deflist_push(&b->defs, UPB_UPCAST(e));
- return UPB_CONTINUE;
-}
-
-static void upb_enumdef_EnumDescriptorProto_endmsg(void *_b, upb_status *status) {
- upb_defbuilder *b = _b;
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- if (upb_defbuilder_last((upb_defbuilder*)_b)->fqname == NULL) {
- upb_seterr(status, UPB_ERROR, "Enum had no name.");
- return;
- }
- if (upb_inttable_count(&e->iton) == 0) {
- upb_seterr(status, UPB_ERROR, "Enum had no values.");
- return;
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
+ upb_enumdef *new_e = upb_enumdef_new();
+ upb_enum_iter i;
+ for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+ assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i),
+ upb_enum_iter_number(i)));
}
+ return new_e;
}
-static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- upb_string_unref(e->base.fqname);
- e->base.fqname = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
+bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) {
+ if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false;
+ upb_ntoi_ent ntoi_ent = {{name, 0}, num};
+ upb_iton_ent iton_ent = {0, name};
+ upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
+ upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name
+ return true;
}
-static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
- upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
- upb_fhandlers *f =
- upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
- upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
-
- upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
- upb_enumdef_register_EnumValueDescriptorProto(h));
- return m;
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) {
+ assert(upb_def_ismutable(UPB_UPCAST(e)));
+ e->defaultval = val;
}
-#undef FNUM
-#undef FTYPE
upb_enum_iter upb_enum_begin(upb_enumdef *e) {
// We could iterate over either table here; the choice is arbitrary.
@@ -573,59 +193,89 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
return upb_inttable_next(&e->iton, iter);
}
-upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
+upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
upb_iton_ent *e =
(upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
return e ? e->string : NULL;
}
-bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, upb_enumval_t *num) {
+bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) {
upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name);
if (!e) return false;
- *num = e->value;
+ if (num) *num = e->value;
return true;
}
/* upb_fielddef ***************************************************************/
+upb_fielddef *upb_fielddef_new() {
+ upb_fielddef *f = malloc(sizeof(*f));
+ f->msgdef = NULL;
+ f->def = NULL;
+ upb_atomic_init(&f->refcount, 1);
+ f->finalized = false;
+ f->type = 0;
+ f->label = UPB_LABEL(OPTIONAL);
+ f->hasbit = 0;
+ f->offset = 0;
+ f->number = 0; // not a valid field number.
+ f->name = NULL;
+ f->accessor = NULL;
+ upb_value_setfielddef(&f->fval, f);
+ return f;
+}
+
static void upb_fielddef_free(upb_fielddef *f) {
if (upb_isstring(f)) {
- upb_string_unref(upb_value_getstr(f->default_value));
- } else if (upb_issubmsg(f)) {
- upb_msg *m = upb_value_getmsg(f->default_value);
- assert(m);
- // We cheat a bit here. We need to unref msg, but we don't have a reliable
- // way of accessing the msgdef (which is required by upb_msg_unref()),
- // because f->def may have already been collected as part of a cycle if
- // this is an unowned ref. But we know that default messages never contain
- // references to other messages, and their only string references are to
- // the singleton empty string, so we can safely unref+free msg directly.
- if (upb_atomic_unref(&m->refcount)) free(m);
+ upb_string_unref(upb_value_getstr(f->defaultval));
}
upb_string_unref(f->name);
- if(f->owned) {
- upb_def_unref(f->def);
- }
free(f);
}
+void upb_fielddef_ref(upb_fielddef *f) {
+ // TODO.
+ (void)f;
+}
+
+void upb_fielddef_unref(upb_fielddef *f) {
+ // TODO.
+ (void)f;
+ if (!f) return;
+ if (upb_atomic_unref(&f->refcount)) {
+ if (f->msgdef) {
+ upb_msgdef_unref(f->msgdef);
+ // fielddef might be deleted now.
+ } else {
+ upb_fielddef_free(f);
+ }
+ }
+}
+
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f) {
+ upb_fielddef *newf = upb_fielddef_new();
+ newf->msgdef = f->msgdef;
+ newf->type = f->type;
+ newf->label = f->label;
+ newf->number = f->number;
+ newf->name = f->name;
+ upb_fielddef_settypename(newf, f->def->fqname);
+ return f;
+}
+
static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
- if(f->owned) upb_def_unref(f->def);
+ assert(upb_dyncast_unresolveddef(f->def));
+ upb_def_unref(f->def);
f->def = def;
- // We will later make the ref unowned if it is a part of a cycle.
- f->owned = true;
- upb_def_ref(def);
- if (upb_issubmsg(f)) {
- upb_msgdef *md = upb_downcast_msgdef(def);
- upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
- } else if (f->type == UPB_TYPE(ENUM)) {
- upb_string *str = upb_value_getstr(f->default_value);
+ if (f->type == UPB_TYPE(ENUM)) {
+ // Resolve the enum's default from a string to an integer.
+ upb_string *str = upb_value_getstr(f->defaultval);
assert(str); // Should point to either a real default or the empty string.
upb_enumdef *e = upb_downcast_enumdef(f->def);
- upb_enumval_t val = 0;
+ int32_t val = 0;
if (str == upb_emptystring()) {
- upb_value_setint32(&f->default_value, e->default_value);
+ upb_value_setint32(&f->defaultval, e->defaultval);
} else {
bool success = upb_enumdef_ntoi(e, str, &val);
upb_string_unref(str);
@@ -634,368 +284,201 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
"member of the enum", UPB_STRARG(str));
return false;
}
- upb_value_setint32(&f->default_value, val);
+ upb_value_setint32(&f->defaultval, val);
}
}
return true;
}
-static upb_flow_t upb_fielddef_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_fielddef *f = malloc(sizeof(*f));
- f->number = -1;
- f->name = NULL;
- f->def = NULL;
- f->owned = false;
- f->msgdef = upb_defbuilder_top(b);
- b->f = f;
- return UPB_CONTINUE;
-}
-
-// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
-// Returns true on success.
-static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
- bool success = true;
- if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
- // We'll keep the ref we had on it. We include enums in this case because
- // we need the enumdef to resolve the name, but we may not have it yet.
- // We'll resolve it later.
- if (dstr) {
- upb_value_setstr(d, dstr);
- } else {
- upb_value_setstr(d, upb_emptystring());
- }
- } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
- // We don't expect to get a default value.
- upb_string_unref(dstr);
- if (dstr != NULL) success = false;
- } else {
- // The strto* functions need the string to be NULL-terminated.
- char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
- char *end;
- upb_string_unref(dstr);
- switch (type) {
- case UPB_TYPE(INT32):
- case UPB_TYPE(SINT32):
- case UPB_TYPE(SFIXED32):
- if (strz) {
- long val = strtol(strz, &end, 0);
- if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
- success = false;
- else
- upb_value_setint32(d, val);
- } else {
- upb_value_setint32(d, 0);
- }
- break;
- case UPB_TYPE(INT64):
- case UPB_TYPE(SINT64):
- case UPB_TYPE(SFIXED64):
- if (strz) {
- upb_value_setint64(d, strtoll(strz, &end, 0));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setint64(d, 0);
- }
- break;
- case UPB_TYPE(UINT32):
- case UPB_TYPE(FIXED32):
- if (strz) {
- unsigned long val = strtoul(strz, &end, 0);
- if (val > UINT32_MAX || errno == ERANGE || *end)
- success = false;
- else
- upb_value_setuint32(d, val);
- } else {
- upb_value_setuint32(d, 0);
- }
- break;
- case UPB_TYPE(UINT64):
- case UPB_TYPE(FIXED64):
- if (strz) {
- upb_value_setuint64(d, strtoull(strz, &end, 0));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setuint64(d, 0);
- }
- break;
- case UPB_TYPE(DOUBLE):
- if (strz) {
- upb_value_setdouble(d, strtod(strz, &end));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setdouble(d, 0.0);
- }
- break;
- case UPB_TYPE(FLOAT):
- if (strz) {
- upb_value_setfloat(d, strtof(strz, &end));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setfloat(d, 0.0);
- }
- break;
- case UPB_TYPE(BOOL):
- if (!strz || strcmp(strz, "false") == 0)
- upb_value_setbool(d, false);
- else if (strcmp(strz, "true") == 0)
- upb_value_setbool(d, true);
- else
- success = false;
- break;
- }
- free(strz);
- }
- return success;
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
+ assert(f->msgdef == NULL);
+ f->number = number;
}
-static void upb_fielddef_endmsg(void *_b, upb_status *status) {
- upb_defbuilder *b = _b;
- upb_fielddef *f = b->f;
- // TODO: verify that all required fields were present.
- assert(f->number != -1 && f->name != NULL);
- assert((f->def != NULL) == upb_hasdef(f));
+void upb_fielddef_setname(upb_fielddef *f, upb_string *name) {
+ assert(f->msgdef == NULL);
+ f->name = upb_string_getref(name);
+}
- // Field was successfully read, add it as a field of the msgdef.
- upb_msgdef *m = upb_defbuilder_top(b);
- upb_itof_ent itof_ent = {0, f->type, upb_types[f->type].native_wire_type, f};
- upb_ntof_ent ntof_ent = {{f->name, 0}, f};
- upb_inttable_insert(&m->itof, f->number, &itof_ent);
- upb_strtable_insert(&m->ntof, &ntof_ent.e);
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
+ assert(!f->finalized);
+ f->type = type;
+}
- upb_string *dstr = b->default_string;
- b->default_string = NULL;
- if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
- // We don't worry too much about giving a great error message since the
- // compiler should have ensured this was correct.
- upb_seterr(status, UPB_ERROR, "Error converting default value.");
- return;
- }
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) {
+ assert(!f->finalized);
+ f->label = label;
+}
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) {
+ assert(!f->finalized);
+ // TODO: string ownership?
+ f->defaultval = value;
}
-static upb_flow_t upb_fielddef_ontype(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->f->type = upb_value_getint32(val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_onlabel(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->f->label = upb_value_getint32(val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_onnumber(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->f->number = upb_value_getint32(val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_onname(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_string_unref(b->f->name);
- b->f->name = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_ontypename(void *_b, upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_def_unref(b->f->def);
- b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val)));
- b->f->owned = true;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_ondefaultval(void *_b, upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- // Have to convert from string to the correct type, but we might not know the
- // type yet.
- upb_string_unref(b->default_string);
- b->default_string = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
- upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
- upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
-
-#define FIELD(name, handler) \
- upb_fhandlers_setvalue( \
- upb_mhandlers_newfhandlers(m, \
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
- false), \
- handler);
- FIELD(TYPE, &upb_fielddef_ontype);
- FIELD(LABEL, &upb_fielddef_onlabel);
- FIELD(NUMBER, &upb_fielddef_onnumber);
- FIELD(NAME, &upb_fielddef_onname);
- FIELD(TYPE_NAME, &upb_fielddef_ontypename);
- FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
- return m;
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) {
+ assert(!f->finalized);
+ // TODO: string ownership?
+ f->fval = fval;
}
-#undef FNUM
-#undef FTYPE
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) {
+ assert(!f->finalized);
+ f->accessor = vtbl;
+}
-/* upb_msgdef *****************************************************************/
+void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) {
+ upb_def_unref(f->def);
+ f->def = UPB_UPCAST(upb_unresolveddef_new(name));
+}
-static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) {
- // Sort by data size (ascending) to reduce padding.
+// Returns an ordering of fields based on:
+// 1. value size (small to large).
+// 2. field number.
+static int upb_fielddef_cmpval(const void *_f1, const void *_f2) {
+ upb_fielddef *f1 = *(void**)_f1;
+ upb_fielddef *f2 = *(void**)_f2;
size_t size1 = upb_types[f1->type].size;
size_t size2 = upb_types[f2->type].size;
if (size1 != size2) return size1 - size2;
- // Otherwise return in number order (just so we get a reproduceable order.
+ // Otherwise return in number order.
return f1->number - f2->number;
}
-static int upb_compare_fields(const void *f1, const void *f2) {
- return upb_compare_typed_fields(*(void**)f1, *(void**)f2);
+// Returns an ordering of all fields based on:
+// 1. required/optional (required fields first).
+// 2. field number
+static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) {
+ upb_fielddef *f1 = *(void**)_f1;
+ upb_fielddef *f2 = *(void**)_f2;
+ size_t req1 = f1->label == UPB_LABEL(REQUIRED);
+ size_t req2 = f2->label == UPB_LABEL(REQUIRED);
+ if (req1 != req2) return req1 - req2;
+ // Otherwise return in number order.
+ return f1->number - f2->number;
}
-// google.protobuf.DescriptorProto.
-static upb_flow_t upb_msgdef_startmsg(void *_b) {
- upb_defbuilder *b = _b;
+
+/* upb_msgdef *****************************************************************/
+
+upb_msgdef *upb_msgdef_new() {
upb_msgdef *m = malloc(sizeof(*m));
upb_def_init(&m->base, UPB_DEF_MSG);
- upb_atomic_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
- m->default_message = NULL;
- upb_deflist_push(&b->defs, UPB_UPCAST(m));
- upb_defbuilder_startcontainer(b);
- return UPB_CONTINUE;
-}
-
-static void upb_msgdef_endmsg(void *_b, upb_status *status) {
- upb_defbuilder *b = _b;
- upb_msgdef *m = upb_defbuilder_top(b);
- if(!m->base.fqname) {
- upb_seterr(status, UPB_ERROR, "Encountered message with no name.");
- return;
+ m->size = 0;
+ m->hasbit_bytes = 0;
+ m->extension_start = 0;
+ m->extension_end = 0;
+ return m;
+}
+
+static void upb_msgdef_free(upb_msgdef *m) {
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+ upb_fielddef_free(upb_msg_iter_field(i));
+ upb_strtable_free(&m->ntof);
+ upb_inttable_free(&m->itof);
+ upb_def_uninit(&m->base);
+ free(m);
+}
+
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m) {
+ upb_msgdef *newm = upb_msgdef_new();
+ newm->size = m->size;
+ newm->hasbit_bytes = m->hasbit_bytes;
+ newm->extension_start = m->extension_start;
+ newm->extension_end = m->extension_end;
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+ upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i)));
+ return newm;
+}
+
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->size = size;
+}
+
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->hasbit_bytes = bytes;
+}
+
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->extension_start = start;
+}
+
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->extension_end = end;
+}
+
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
+ assert(upb_atomic_read(&f->refcount) > 0);
+ if (!upb_atomic_unref(&f->refcount)) upb_msgdef_ref(m);
+ if (upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) {
+ upb_fielddef_unref(f);
+ return false;
}
+ assert(f->msgdef == NULL);
+ f->msgdef = m;
+ upb_itof_ent itof_ent = {0, f};
+ upb_ntof_ent ntof_ent = {{f->name, 0}, f};
+ upb_inttable_insert(&m->itof, f->number, &itof_ent);
+ upb_strtable_insert(&m->ntof, &ntof_ent.e);
+ return true;
+}
+
+static int upb_div_round_up(int numerator, int denominator) {
+ /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
+ return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
+}
- upb_inttable_compact(&m->itof);
- // Create an ordering over the fields.
- int n = upb_msgdef_numfields(m);
- upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n);
- int field = 0;
+void upb_msgdef_layout(upb_msgdef *m) {
+ // Create an ordering over the fields, but only include fields with accessors.
+ upb_fielddef **sorted_fields =
+ malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m));
+ int n = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++] = upb_msg_iter_field(i);
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (f->accessor) sorted_fields[n++] = f;
}
- qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
- // Assign offsets in the msg.
- m->set_flags_bytes = upb_div_round_up(n, 8);
- m->size = sizeof(upb_atomic_t) + m->set_flags_bytes;
+ m->hasbit_bytes = upb_div_round_up(n, 8);
+ m->size = m->hasbit_bytes; // + header_size?
+ // Assign hasbits.
+ qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit);
+ for (int i = 0; i < n; i++) {
+ upb_fielddef *f = sorted_fields[i];
+ f->hasbit = i;
+ }
+
+ // Assign value offsets.
+ qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval);
size_t max_align = 0;
for (int i = 0; i < n; i++) {
upb_fielddef *f = sorted_fields[i];
const upb_type_info *type_info = &upb_types[f->type];
-
- // This identifies the set bit. When we implement is_initialized (a
- // general check about whether all required bits are set) we will probably
- // want to use a different ordering that puts all the required bits
- // together.
- f->field_index = i;
- f->set_bit_mask = 1 << (i % 8);
- f->set_bit_offset = i / 8;
-
- size_t size, align;
- if (upb_isarray(f)) {
+ size_t size = type_info->size;
+ size_t align = type_info->align;
+ if (upb_isseq(f)) {
size = sizeof(void*);
align = alignof(void*);
- } else {
- size = type_info->size;
- align = type_info->align;
}
+
// General alignment rules are: each member must be at an address that is a
// multiple of that type's alignment. Also, the size of the structure as a
// whole must be a multiple of the greatest alignment of any member.
- size_t offset = upb_align_up(m->size, align);
- // Offsets are relative to the end of the refcount.
- f->byte_offset = offset - sizeof(upb_atomic_t);
- m->size = offset + size;
+ f->offset = upb_align_up(m->size, align);
+ m->size = f->offset + size;
max_align = UPB_MAX(max_align, align);
}
- free(sorted_fields);
-
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
- // Create default message instance, an immutable message with all default
- // values set (except submessages, which are simply marked as unset). We
- // could alternatively leave all set bits unset, but this would make
- // upb_msg_get() take its unexpected branch more often for no good reason.
- m->default_message = upb_msg_new(m);
- for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
- upb_msg_set(m->default_message, f, f->default_value);
- }
- }
-
- upb_defbuilder_endcontainer(b);
-}
-
-static upb_flow_t upb_msgdef_onname(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- assert(val.type == UPB_TYPE(STRING));
- upb_msgdef *m = upb_defbuilder_top(b);
- upb_string_unref(m->base.fqname);
- m->base.fqname = upb_string_getref(upb_value_getstr(val));
- upb_defbuilder_setscopename(b, upb_value_getstr(val));
- return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
- upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
- upb_fhandlers *f =
- upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
- upb_fhandlers_setvalue(f, &upb_msgdef_onname);
-
- upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
- upb_fielddef_register_FieldDescriptorProto(h));
- upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
- upb_enumdef_register_EnumDescriptorProto(h));
-
- // DescriptorProto is self-recursive, so we must link the definition.
- upb_mhandlers_newfhandlers_subm(
- m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
-
- // TODO: extensions.
- return m;
-}
-#undef FNUM
-#undef FTYPE
-
-static void upb_msgdef_free(upb_msgdef *m)
-{
- upb_msg_unref(m->default_message, m);
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
- upb_fielddef_free(upb_msg_iter_field(i));
- upb_strtable_free(&m->ntof);
- upb_inttable_free(&m->itof);
- upb_def_uninit(&m->base);
- free(m);
+ free(sorted_fields);
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@@ -1006,22 +489,49 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
return upb_inttable_next(&m->itof, iter);
}
-/* upb_symtab adding defs *****************************************************/
-// This is a self-contained group of functions that, given a list of upb_defs
-// whose references are not yet resolved, resolves references and adds them
-// atomically to a upb_symtab.
+/* upb_symtabtxn **************************************************************/
typedef struct {
upb_strtable_entry e;
upb_def *def;
} upb_symtab_ent;
+void upb_symtabtxn_init(upb_symtabtxn *t) {
+ upb_strtable_init(&t->deftab, 16, sizeof(upb_symtab_ent));
+}
+
+void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
+ upb_strtable *t = &txn->deftab;
+ upb_symtab_ent *e;
+ for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
+ upb_def_unref(e->def);
+ upb_strtable_free(t);
+}
+
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
+ // TODO: check if already present.
+ upb_symtab_ent e = {{def->fqname, 0}, def};
+ upb_strtable_insert(&t->deftab, &e.e);
+ return true;
+}
+
+#if 0
+err:
+ // We need to free all defs from "tmptab."
+ upb_rwlock_unlock(&s->lock);
+ for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
+ e = upb_strtable_next(&tmptab, &e->e)) {
+ upb_def_unref(e->def);
+ }
+ upb_strtable_free(&tmptab);
+ return false;
+#endif
+
// Given a symbol and the base symbol inside which it is defined, find the
// symbol's definition in t.
static upb_symtab_ent *upb_resolve(upb_strtable *t,
- upb_string *base, upb_string *sym)
-{
+ upb_string *base, upb_string *sym) {
if(upb_string_len(sym) == 0) return NULL;
if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
// Symbols starting with '.' are absolute, so we do a single lookup.
@@ -1060,212 +570,63 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t,
}
}
-// Performs a pass over the type graph to find all cycles that include m.
-static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
-{
- if(depth > UPB_MAX_TYPE_DEPTH) {
- // We have found a non-cyclic path from the base of the type tree that
- // exceeds the maximum allowed depth. There are many situations in upb
- // where we recurse over the type tree (like for example, right now) and an
- // absurdly deep tree could cause us to stack overflow on systems with very
- // limited stacks.
- upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at "
- "depth %d in the type graph, which exceeds the maximum type "
- "depth of %d.", UPB_UPCAST(m)->fqname, depth,
- UPB_MAX_TYPE_DEPTH);
- return false;
- } else if(UPB_UPCAST(m)->search_depth == 1) {
- // Cycle!
- int cycle_len = depth - 1;
- if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) {
- upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved "
- "in a cycle of length %d, which exceeds the maximum type "
- "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len,
- UPB_MAX_TYPE_CYCLE_LEN);
- return false;
- }
- return true;
- } else if(UPB_UPCAST(m)->search_depth > 0) {
- // This was a cycle, but did not originate from the base of our search tree.
- // We'll find it when we call find_cycles() on this node directly.
- return false;
- } else {
- UPB_UPCAST(m)->search_depth = ++depth;
- bool cycle_found = false;
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if(!upb_issubmsg(f)) continue;
- upb_def *sub_def = f->def;
- upb_msgdef *sub_m = upb_downcast_msgdef(sub_def);
- if(upb_symtab_findcycles(sub_m, depth, status)) {
- cycle_found = true;
- UPB_UPCAST(m)->is_cyclic = true;
- if(f->owned) {
- upb_atomic_unref(&sub_def->refcount);
- f->owned = false;
- }
- }
- }
- UPB_UPCAST(m)->search_depth = 0;
- return cycle_found;
- }
+upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) {
+ return upb_strtable_begin(&t->deftab);
}
-// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab. Also resolves
-// default values for enumerations and submessages.
-bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
- upb_status *status)
-{
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
- // Type names are resolved relative to the message in which they appear.
- upb_string *base = e->e.key;
-
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if(!upb_hasdef(f)) continue; // No resolving necessary.
- upb_string *name = upb_downcast_unresolveddef(f->def)->name;
-
- // Resolve from either the tmptab (pending adds) or symtab (existing
- // defs). If both exist, prefer the pending add, because it will be
- // overwriting the existing def.
- upb_symtab_ent *found;
- if(!(found = upb_resolve(tmptab, base, name)) &&
- !(found = upb_resolve(symtab, base, name))) {
- upb_seterr(status, UPB_ERROR,
- "could not resolve symbol '" UPB_STRFMT "'"
- " in context '" UPB_STRFMT "'",
- UPB_STRARG(name), UPB_STRARG(base));
- return false;
- }
-
- // Check the type of the found def.
- upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
- if(found->def->type != expected) {
- upb_seterr(status, UPB_ERROR, "Unexpected type");
- return false;
- }
- if (!upb_fielddef_resolve(f, found->def, status)) return false;
- }
- }
+upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) {
+ return upb_strtable_next(&t->deftab, i);
+}
- // Deal with type cycles.
- for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
- // The findcycles() call will decrement the external refcount of the
- upb_symtab_findcycles(m, 0, status);
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true);
- }
+bool upb_symtabtxn_done(upb_symtabtxn_iter i) {
+ return i == NULL;
+}
- return true;
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
+ upb_symtab_ent *e = iter;
+ return e->def;
}
-// Given a list of defs, a list of extensions (in the future), and a flag
-// indicating whether the new defs can overwrite existing defs in the symtab,
-// attempts to add the given defs to the symtab. The whole operation either
-// succeeds or fails. Ownership of "defs" and "exts" is taken.
-static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs,
- bool allow_redef, upb_status *status)
-{
- upb_rwlock_wrlock(&s->lock);
- // Build a table of the defs we mean to add, for duplicate detection and name
- // resolution.
- upb_strtable tmptab;
- upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent));
- for (int i = 0; i < num_defs; i++) {
- upb_def *def = defs[i];
- upb_symtab_ent e = {{def->fqname, 0}, def};
-
- // Redefinition is never allowed within a single FileDescriptorSet.
- // Additionally, we only allow overwriting of an existing definition if
- // allow_redef is set.
- if (upb_strtable_lookup(&tmptab, def->fqname) ||
- (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) {
- upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT,
- UPB_STRARG(def->fqname));
- goto err;
- }
+/* upb_symtab public interface ************************************************/
- // Pass ownership from the deflist to the strtable.
- upb_strtable_insert(&tmptab, &e.e);
- defs[i] = NULL;
+static void _upb_symtab_free(upb_strtable *t) {
+ upb_symtab_ent *e;
+ for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) {
+ assert(upb_atomic_read(&e->def->refcount) == 0);
+ upb_def_free(e->def);
}
+ upb_strtable_free(t);
+}
- // TODO: process the list of extensions by modifying entries from
- // tmptab in-place (copying them from the symtab first if necessary).
-
- if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err;
-
- // The defs in tmptab have been vetted, and can be added to the symtab
- // without causing errors. Now add all tmptab defs to the symtab,
- // overwriting (and releasing a ref on) any existing defs with the same
- // names. Ownership for tmptab defs passes from the tmptab to the symtab.
- upb_symtab_ent *tmptab_e;
- for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e;
- tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) {
- upb_symtab_ent *symtab_e =
- upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
- if(symtab_e) {
- upb_def_unref(symtab_e->def);
- symtab_e->def = tmptab_e->def;
- } else {
- upb_strtable_insert(&s->symtab, &tmptab_e->e);
- }
+static void upb_symtab_free(upb_symtab *s) {
+ _upb_symtab_free(&s->symtab);
+ for (uint32_t i = 0; i < s->olddefs.len; i++) {
+ upb_def *d = s->olddefs.defs[i];
+ assert(upb_atomic_read(&d->refcount) == 0);
+ upb_def_free(d);
}
+ upb_rwlock_destroy(&s->lock);
+ upb_deflist_uninit(&s->olddefs);
+ free(s);
+}
- upb_rwlock_unlock(&s->lock);
- upb_strtable_free(&tmptab);
- return true;
-
-err:
- // We need to free all defs from "tmptab."
- upb_rwlock_unlock(&s->lock);
- for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
- e = upb_strtable_next(&tmptab, &e->e)) {
- upb_def_unref(e->def);
+void upb_symtab_unref(upb_symtab *s) {
+ if(s && upb_atomic_unref(&s->refcount)) {
+ upb_symtab_free(s);
}
- upb_strtable_free(&tmptab);
- return false;
}
-
-/* upb_symtab public interface ************************************************/
-
-upb_symtab *upb_symtab_new()
-{
+upb_symtab *upb_symtab_new() {
upb_symtab *s = malloc(sizeof(*s));
upb_atomic_init(&s->refcount, 1);
upb_rwlock_init(&s->lock);
upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
- s->fds_msgdef = NULL;
+ upb_deflist_init(&s->olddefs);
return s;
}
-static void upb_free_symtab(upb_strtable *t)
-{
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
- upb_def_unref(e->def);
- upb_strtable_free(t);
-}
-
-void _upb_symtab_free(upb_symtab *s)
-{
- upb_free_symtab(&s->symtab);
- upb_rwlock_destroy(&s->lock);
- free(s);
-}
-
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type)
-{
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
upb_rwlock_rdlock(&s->lock);
int total = upb_strtable_count(&s->symtab);
// We may only use part of this, depending on how many symbols are of the
@@ -1281,13 +642,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type)
}
upb_rwlock_unlock(&s->lock);
*count = i;
- for(i = 0; i < *count; i++)
- upb_def_ref(defs[i]);
+ for(i = 0; i < *count; i++) upb_def_ref(defs[i]);
return defs;
}
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
-{
+upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
upb_def *ret = NULL;
@@ -1299,7 +658,6 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
return ret;
}
-
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
@@ -1311,3 +669,109 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol)
upb_rwlock_unlock(&s->lock);
return ret;
}
+
+bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
+ upb_symtabtxn *txn) {
+ // This linear search makes the DFS O(n^2) in the length of the paths.
+ // Could make this O(n) with a hash table, but n is small.
+ for (int i = 0; i < n; i++) {
+ if (def == open_defs[i]) return false;
+ }
+
+ bool needcopy = false;
+ upb_msgdef *m = upb_dyncast_msgdef(def);
+ if (m) {
+ upb_msg_iter i;
+ open_defs[n++] = def;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_hasdef(f)) continue;
+ needcopy |= upb_symtab_dfs(f->def, open_defs, n, txn);
+ }
+ }
+
+ bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
+ if (needcopy && !replacing) {
+ upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)};
+ upb_strtable_insert(&txn->deftab, &e.e);
+ replacing = true;
+ }
+ return replacing;
+}
+
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
+ upb_rwlock_wrlock(&s->lock);
+
+ // All existing defs that can reach defs that are being replaced must
+ // themselves be replaced with versions that will point to the new defs.
+ // Do a DFS -- any path that finds a new def must replace all ancestors.
+ upb_strtable *symtab = &s->symtab;
+ upb_symtab_ent *e;
+ for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) {
+ upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
+ upb_symtab_dfs(e->def, open_defs, 0, txn);
+ }
+
+ // Resolve all refs.
+ upb_strtable *txntab = &txn->deftab;
+ for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) {
+ upb_msgdef *m = upb_dyncast_msgdef(e->def);
+ if(!m) continue;
+ // Type names are resolved relative to the message in which they appear.
+ upb_string *base = m->base.fqname;
+
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if(!upb_hasdef(f)) continue; // No resolving necessary.
+ upb_string *name = upb_downcast_unresolveddef(f->def)->name;
+
+ // Resolve from either the txntab (pending adds) or symtab (existing
+ // defs). If both exist, prefer the pending add, because it will be
+ // overwriting the existing def.
+ upb_symtab_ent *found;
+ if(!(found = upb_resolve(txntab, base, name)) &&
+ !(found = upb_resolve(symtab, base, name))) {
+ upb_seterr(status, UPB_ERROR,
+ "could not resolve symbol '" UPB_STRFMT "'"
+ " in context '" UPB_STRFMT "'",
+ UPB_STRARG(name), UPB_STRARG(base));
+ return false;
+ }
+
+ // Check the type of the found def.
+ upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+ if(found->def->type != expected) {
+ upb_seterr(status, UPB_ERROR, "Unexpected type");
+ return false;
+ }
+ if (!upb_fielddef_resolve(f, found->def, status)) return false;
+ }
+ }
+
+ // The defs in the transaction have been vetted, and can be moved to the
+ // symtab without causing errors.
+ upb_symtab_ent *tmptab_e;
+ for(tmptab_e = upb_strtable_begin(txntab); tmptab_e;
+ tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) {
+ upb_def_movetosymtab(tmptab_e->def, s);
+ upb_symtab_ent *symtab_e =
+ upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
+ if(symtab_e) {
+ upb_deflist_push(&s->olddefs, symtab_e->def);
+ symtab_e->def = tmptab_e->def;
+ } else {
+ upb_strtable_insert(&s->symtab, &tmptab_e->e);
+ }
+ }
+
+ upb_strtable_clear(txntab);
+ upb_rwlock_unlock(&s->lock);
+ upb_symtab_gc(s);
+ return true;
+}
+
+void upb_symtab_gc(upb_symtab *s) {
+ (void)s;
+ // TODO.
+}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback