diff options
Diffstat (limited to 'upb/def.c')
-rw-r--r-- | upb/def.c | 1194 |
1 files changed, 682 insertions, 512 deletions
@@ -1,7 +1,7 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2008-2009 Google Inc. See LICENSE for details. + * Copyright (c) 2008-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> */ @@ -11,168 +11,283 @@ #include "upb/bytestream.h" #include "upb/def.h" -#define alignof(t) offsetof(struct { char c; t x; }, x) +// isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. +static bool upb_isbetween(char c, char low, char high) { + return c >= low && c <= high; +} -void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; +static bool upb_isletter(char c) { + return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; } -void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]); - free(l->defs); +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); +static bool upb_isident(const char *str, size_t len, bool full) { + bool start = true; + for (size_t i = 0; i < len; i++) { + char c = str[i]; + if (c == '.') { + if (start || !full) return false; + start = true; + } else if (start) { + if (!upb_isletter(c)) return false; + start = false; + } else { + if (!upb_isalphanum(c)) return false; + } } - l->defs[l->len++] = d; + return !start; } /* upb_def ********************************************************************/ static void upb_msgdef_free(upb_msgdef *m); +static void upb_fielddef_free(upb_fielddef *f); static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); -bool upb_def_ismutable(const upb_def *def) { return def->symtab == NULL; } +bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; } +bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; } -bool upb_def_setfqname(upb_def *def, const char *fqname) { +bool upb_def_setfullname(upb_def *def, const char *fullname) { assert(upb_def_ismutable(def)); - free(def->fqname); - def->fqname = strdup(fqname); - return true; // TODO: check for acceptable characters. -} - -static void upb_def_free(upb_def *def) { - switch (def->type) { - case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; - case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; - default: - assert(false); - } + if (!upb_isident(fullname, strlen(fullname), true)) return false; + free(def->fullname); + def->fullname = strdup(fullname); + return true; } -upb_def *upb_def_dup(const upb_def *def) { +upb_def *upb_def_dup(const upb_def *def, void *o) { switch (def->type) { case UPB_DEF_MSG: - return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def))); + return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); + case UPB_DEF_FIELD: + return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); case UPB_DEF_ENUM: - return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def))); + return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); default: assert(false); return NULL; } } -// Prior to being in a symtab, the def's refcount controls the lifetime of the -// def itself. If the refcount falls to zero, the def is deleted. Once the -// def belongs to a symtab, the def is owned by the symtab and its refcount -// determines whether the def owns a ref on the symtab or not. -void upb_def_ref(const upb_def *_def) { - upb_def *def = (upb_def*)_def; // Need to modify refcount. - if (upb_atomic_ref(&def->refcount) && def->symtab) - upb_symtab_ref(def->symtab); -} - -static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) { - assert(upb_atomic_read(&d->refcount) > 0); - d->symtab = s; - upb_symtab_ref(s); - upb_msgdef *m = upb_dyncast_msgdef(d); - if (m) upb_inttable_compact(&m->itof); +void upb_def_ref(const upb_def *_def, void *owner) { + upb_def *def = (upb_def*)_def; + upb_refcount_ref(&def->refcount, owner); } -void upb_def_unref(const upb_def *_def) { - upb_def *def = (upb_def*)_def; // Need to modify refcount. +void upb_def_unref(const upb_def *_def, void *owner) { + upb_def *def = (upb_def*)_def; if (!def) return; - if (upb_atomic_unref(&def->refcount)) { - if (def->symtab) { - upb_symtab_unref(def->symtab); - // Def might be deleted now. - } else { - upb_def_free(def); + if (!upb_refcount_unref(&def->refcount, owner)) return; + upb_def *base = def; + // Free all defs in the SCC. + do { + upb_def *next = (upb_def*)def->refcount.next; + switch (def->type) { + case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; + case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break; + case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; + default: + assert(false); } - } + def = next; + } while(def != base); } -static void upb_def_init(upb_def *def, upb_deftype_t type) { +static bool upb_def_init(upb_def *def, upb_deftype_t type, void *owner) { def->type = type; - def->fqname = NULL; - def->symtab = NULL; - upb_atomic_init(&def->refcount, 1); + def->is_finalized = false; + def->fullname = NULL; + return upb_refcount_init(&def->refcount, owner); } static void upb_def_uninit(upb_def *def) { - free(def->fqname); + upb_refcount_uninit(&def->refcount); + free(def->fullname); } +void upb_def_donateref(const upb_def *_def, void *from, void *to) { + upb_def *def = (upb_def*)_def; + upb_refcount_donateref(&def->refcount, from, to); +} -/* upb_unresolveddef **********************************************************/ - -// Unresolved defs are used as temporary placeholders for a def whose name has -// not been resolved yet. During the name resolution step, all unresolved defs -// are replaced with pointers to the actual def being referenced. -typedef struct _upb_unresolveddef { - upb_def base; -} upb_unresolveddef; +static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) { + upb_def *def = (upb_def*)refcount; + switch (def->type) { + case UPB_DEF_MSG: { + upb_msgdef *m = upb_downcast_msgdef(def); + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + upb_refcount_visit(refcount, &f->base.refcount, closure); + } + break; + } + case UPB_DEF_FIELD: { + upb_fielddef *f = upb_downcast_fielddef(def); + assert(f->msgdef); + upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure); + upb_def *subdef = f->sub.def; + if (subdef) + upb_refcount_visit(refcount, &subdef->refcount, closure); + break; + } + case UPB_DEF_ENUM: + case UPB_DEF_SERVICE: + case UPB_DEF_ANY: + break; + } +} -// Is passed a ref on the string. -static upb_unresolveddef *upb_unresolveddef_new(const char *str) { - upb_unresolveddef *def = malloc(sizeof(*def)); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->base.fqname = strdup(str); - return def; +static bool upb_validate_field(const upb_fielddef *f, upb_status *s) { + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) { + upb_status_seterrliteral(s, "fielddef must have name and number set"); + return false; + } + if (upb_hassubdef(f)) { + if (f->subdef_is_symbolic) { + upb_status_seterrf(s, + "field %s has not been resolved", upb_fielddef_name(f)); + return false; + } else if (upb_fielddef_subdef(f) == NULL) { + upb_status_seterrf(s, + "field is %s missing required subdef", upb_fielddef_name(f)); + return false; + } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) { + upb_status_seterrf(s, + "field %s subtype is not being finalized", upb_fielddef_name(f)); + return false; + } + } + return true; } -static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { - upb_def_uninit(&def->base); - free(def); +bool upb_finalize(upb_def *const*defs, int n, upb_status *s) { + if (n >= UINT16_MAX - 1) { + upb_status_seterrliteral(s, "too many defs (max is 64k at a time)"); + return false; + } + + // First perform validation, in two passes so we can check that we have a + // transitive closure without needing to search. + for (int i = 0; i < n; i++) { + upb_def *def = defs[i]; + if (upb_def_isfinalized(def)) { + // Could relax this requirement if it's annoying. + upb_status_seterrliteral(s, "def is already finalized"); + goto err; + } else if (def->type == UPB_DEF_FIELD) { + upb_status_seterrliteral(s, "standalone fielddefs can not be finalized"); + goto err; + } else { + // Set now to detect transitive closure in the second pass. + def->is_finalized = true; + } + } + + for (int i = 0; i < n; i++) { + upb_msgdef *m = upb_dyncast_msgdef(defs[i]); + if (!m) continue; + upb_inttable_compact(&m->itof); + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + assert(f->msgdef == m); + if (!upb_validate_field(f, s)) goto err; + } + } + + // Validation all passed, now find strongly-connected components so that + // our refcounting works with cycles. + upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors); + + // Now that ref cycles have been removed it is safe to have each fielddef + // take a ref on its subdef (if any), but only if it's a member of another + // SCC. + for (int i = 0; i < n; i++) { + upb_msgdef *m = upb_dyncast_msgdef(defs[i]); + if (!m) continue; + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + f->base.is_finalized = true; + // Release the ref taken in upb_msgdef_addfields(). + upb_fielddef_unref(f, m); + if (!upb_hassubdef(f)) continue; + assert(upb_fielddef_subdef(f)); + if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) { + // Subdef is part of a different strongly-connected component. + upb_def_ref(f->sub.def, &f->sub.def); + f->subdef_is_owned = true; + } + } + } + + return true; + +err: + for (int i = 0; i < n; i++) { + defs[i]->is_finalized = false; + } + return false; } /* upb_enumdef ****************************************************************/ -upb_enumdef *upb_enumdef_new() { +upb_enumdef *upb_enumdef_new(void *owner) { upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + if (!e) return NULL; + if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2; + if (!upb_strtable_init(&e->ntoi)) goto err2; + if (!upb_inttable_init(&e->iton)) goto err1; return e; + +err1: + upb_strtable_uninit(&e->ntoi); +err2: + free(e); + return NULL; } static void upb_enumdef_free(upb_enumdef *e) { - upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - // Frees the ref taken when the string was parsed. - free(upb_enum_iter_name(i)); - } - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); + upb_inttable_iter i; + upb_inttable_begin(&i, &e->iton); + for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { + // To clean up the strdup() from upb_enumdef_addval(). + free(upb_value_getptr(upb_inttable_iter_value(&i))); + } + upb_strtable_uninit(&e->ntoi); + upb_inttable_uninit(&e->iton); upb_def_uninit(&e->base); free(e); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e) { - upb_enumdef *new_e = upb_enumdef_new(); +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner) { + upb_enumdef *new_e = upb_enumdef_new(owner); + if (!new_e) return NULL; upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i), - upb_enum_iter_number(i))); + for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { + bool success = upb_enumdef_addval( + new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i)); + if (!success) { + upb_enumdef_unref(new_e, owner); + return NULL; + } } return new_e; } -bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) { - if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) { + if (!upb_isident(name, strlen(name), false)) return false; + if (upb_enumdef_ntoi(e, name, NULL)) + return false; + if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) + return false; + if (!upb_inttable_lookup(&e->iton, num) && + !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name)))) return false; - upb_iton_ent ent = {0, strdup(name)}; - upb_strtable_insert(&e->ntoi, name, &num); - upb_inttable_insert(&e->iton, num, &ent); return true; } @@ -181,42 +296,70 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { e->defaultval = val; } -upb_enum_iter upb_enum_begin(const upb_enumdef *e) { - // We could iterate over either table here; the choice is arbitrary. - return upb_inttable_begin(&e->iton); +void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { + // We iterate over the ntoi table, to account for duplicate numbers. + upb_strtable_begin(i, &e->ntoi); } -upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter) { - return upb_inttable_next(&e->iton, iter); -} +void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } +bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } -const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) { - upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); - return e ? e->str : NULL; -} - -bool upb_enumdef_ntoil(upb_enumdef *def, const char *name, size_t len, int32_t *num) { - upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len); - if (!e) return false; - if (num) *num = e->value; +bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { + const upb_value *v = upb_strtable_lookup(&def->ntoi, name); + if (!v) return false; + if (num) *num = upb_value_getint32(*v); return true; } -bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num) { - return upb_enumdef_ntoil(e, name, strlen(name), num); +const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { + const upb_value *v = upb_inttable_lookup32(&def->iton, num); + return v ? upb_value_getptr(*v) : NULL; } /* upb_fielddef ***************************************************************/ +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(ctype, inmemory_type) \ + {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type} + +const upb_typeinfo upb_types[UPB_NUM_TYPES] = { + // END_GROUP is not real, but used to signify the pseudo-field that + // ends a group from within the group. + TYPE_INFO(void*, PTR), // ENDGROUP + TYPE_INFO(double, DOUBLE), // DOUBLE + TYPE_INFO(float, FLOAT), // FLOAT + TYPE_INFO(int64_t, INT64), // INT64 + TYPE_INFO(uint64_t, UINT64), // UINT64 + TYPE_INFO(int32_t, INT32), // INT32 + TYPE_INFO(uint64_t, UINT64), // FIXED64 + TYPE_INFO(uint32_t, UINT32), // FIXED32 + TYPE_INFO(bool, BOOL), // BOOL + TYPE_INFO(void*, BYTEREGION), // STRING + TYPE_INFO(void*, PTR), // GROUP + TYPE_INFO(void*, PTR), // MESSAGE + TYPE_INFO(void*, BYTEREGION), // BYTES + TYPE_INFO(uint32_t, UINT32), // UINT32 + TYPE_INFO(uint32_t, INT32), // ENUM + TYPE_INFO(int32_t, INT32), // SFIXED32 + TYPE_INFO(int64_t, INT64), // SFIXED64 + TYPE_INFO(int32_t, INT32), // SINT32 + TYPE_INFO(int64_t, INT64), // SINT64 +}; + static void upb_fielddef_init_default(upb_fielddef *f); -upb_fielddef *upb_fielddef_new() { +upb_fielddef *upb_fielddef_new(void *owner) { upb_fielddef *f = malloc(sizeof(*f)); + if (!f) return NULL; + if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) { + free(f); + return NULL; + } f->msgdef = NULL; - f->def = NULL; - upb_atomic_init(&f->refcount, 1); - f->finalized = false; + f->sub.def = NULL; + f->subdef_is_symbolic = false; + f->subdef_is_owned = false; f->label = UPB_LABEL(OPTIONAL); f->hasbit = -1; f->offset = 0; @@ -226,14 +369,68 @@ upb_fielddef *upb_fielddef_new() { // These are initialized to be invalid; the user must set them explicitly. // Could relax this later if it's convenient and non-confusing to have a // defaults for them. - f->name = NULL; - f->type = 0; + f->type = UPB_TYPE_NONE; f->number = 0; upb_fielddef_init_default(f); return f; } +static void upb_fielddef_uninit_default(upb_fielddef *f) { + if (f->default_is_string) + upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); +} + +static void upb_fielddef_free(upb_fielddef *f) { + if (f->subdef_is_owned) + upb_def_unref(f->sub.def, &f->sub.def); + upb_fielddef_uninit_default(f); + upb_def_uninit(UPB_UPCAST(f)); + free(f); +} + +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner) { + upb_fielddef *newf = upb_fielddef_new(owner); + if (!newf) return NULL; + upb_fielddef_settype(newf, upb_fielddef_type(f)); + upb_fielddef_setlabel(newf, upb_fielddef_label(f)); + upb_fielddef_setnumber(newf, upb_fielddef_number(f)); + upb_fielddef_setname(newf, upb_fielddef_name(f)); + upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f)); + upb_fielddef_setoffset(newf, upb_fielddef_offset(f)); + upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f)); + upb_fielddef_setfval(newf, upb_fielddef_fval(f)); + if (f->default_is_string) { + upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); + size_t len; + const char *ptr = upb_byteregion_getptr(r, 0, &len); + assert(len == upb_byteregion_len(r)); + upb_fielddef_setdefaultstr(newf, ptr, len); + } else { + upb_fielddef_setdefault(newf, upb_fielddef_default(f)); + } + + const char *srcname; + if (f->subdef_is_symbolic) { + srcname = f->sub.name; // Might be NULL. + } else { + srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL; + } + if (srcname) { + char *newname = malloc(strlen(f->sub.def->fullname) + 2); + if (!newname) { + upb_fielddef_unref(newf, owner); + return NULL; + } + strcpy(newname, "."); + strcat(newname, f->sub.def->fullname); + upb_fielddef_setsubtypename(newf, newname); + free(newname); + } + + return newf; +} + static void upb_fielddef_init_default(upb_fielddef *f) { f->default_is_string = false; switch (upb_fielddef_type(f)) { @@ -253,105 +450,62 @@ static void upb_fielddef_init_default(upb_fielddef *f) { case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - f->default_is_string = true; - upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); - break; + upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); + f->default_is_string = true; + break; case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; + case UPB_TYPE_ENDGROUP: assert(false); + case UPB_TYPE_NONE: break; } } -static void upb_fielddef_uninit_default(upb_fielddef *f) { - if (f->default_is_string) { - upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); - } -} - -static void upb_fielddef_free(upb_fielddef *f) { - upb_fielddef_uninit_default(f); - if (f->def) { - // We own a ref on the subdef iff we are not part of a msgdef. - if (f->msgdef == NULL) { - if (f->def) upb_downcast_unresolveddef(f->def); // assert() check. - upb_def_unref(f->def); - } - } - free(f->name); - free(f); -} - -void upb_fielddef_ref(upb_fielddef *f) { - // TODO. - (void)f; -} - -void upb_fielddef_unref(upb_fielddef *f) { - // TODO. - (void)f; - if (!f) return; - if (upb_atomic_unref(&f->refcount)) { - if (f->msgdef) { - upb_msgdef_unref(f->msgdef); - // fielddef might be deleted now. - } else { - upb_fielddef_free(f); - } +const upb_def *upb_fielddef_subdef(const upb_fielddef *f) { + if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) { + assert(f->sub.def); + return f->sub.def; + } else { + return f->subdef_is_symbolic ? NULL : f->sub.def; } } -upb_fielddef *upb_fielddef_dup(upb_fielddef *f) { - upb_fielddef *newf = upb_fielddef_new(); - newf->msgdef = f->msgdef; - newf->type = f->type; - newf->label = f->label; - newf->number = f->number; - newf->name = f->name; - upb_fielddef_settypename(newf, f->def->fqname); - return f; +upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) { + return (upb_def*)upb_fielddef_subdef(f); } -bool upb_fielddef_ismutable(const upb_fielddef *f) { - return !f->msgdef || upb_def_ismutable(UPB_UPCAST(f->msgdef)); +const char *upb_fielddef_subtypename(upb_fielddef *f) { + assert(upb_fielddef_ismutable(f)); + return f->subdef_is_symbolic ? f->sub.name : NULL; } -upb_def *upb_fielddef_subdef(const upb_fielddef *f) { - if (upb_hassubdef(f) && !upb_fielddef_ismutable(f)) - return f->def; - else - return NULL; -} - -static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { - assert(upb_dyncast_unresolveddef(f->def)); - upb_def_unref(f->def); - f->def = def; - if (f->type == UPB_TYPE(ENUM) && f->default_is_string) { - // Resolve the enum's default from a string to an integer. - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); // Points to either a real default or the empty string. - upb_enumdef *e = upb_downcast_enumdef(f->def); - int32_t val = 0; - // Could do a sanity check that the default value does not have embedded - // NULLs. - if (upb_byteregion_len(bytes) == 0) { - upb_value_setint32(&f->defaultval, e->defaultval); - } else { - size_t len; - // ptr is guaranteed to be NULL-terminated because the byteregion was - // created with upb_byteregion_newl(). - const char *ptr = upb_byteregion_getptr(bytes, 0, &len); - assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. - bool success = upb_enumdef_ntoi(e, ptr, &val); - if (!success) { - upb_status_seterrf( - s, "Default enum value (%s) is not a member of the enum", ptr); - return false; - } - upb_value_setint32(&f->defaultval, val); +// Could expose this to clients if a client wants to call it independently +// of upb_resolve() for whatever reason. +static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) { + if (!f->default_is_string) return true; + // Resolve the enum's default from a string to an integer. + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); // Points to either a real default or the empty string. + upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f)); + int32_t val = 0; + if (upb_byteregion_len(bytes) == 0) { + upb_value_setint32(&f->defaultval, e->defaultval); + } else { + size_t len; + // ptr is guaranteed to be NULL-terminated because the byteregion was + // created with upb_byteregion_newl(). + const char *ptr = upb_byteregion_getptr( + bytes, upb_byteregion_startofs(bytes), &len); + assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. + bool success = upb_enumdef_ntoi(e, ptr, &val); + if (!success) { + upb_status_seterrf( + s, "Default enum value (%s) is not a member of the enum", ptr); + return false; } - f->default_is_string = false; - upb_byteregion_free(bytes); + upb_value_setint32(&f->defaultval, val); } + f->default_is_string = false; + upb_byteregion_free(bytes); return true; } @@ -361,42 +515,50 @@ bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { return true; } -bool upb_fielddef_setname(upb_fielddef *f, const char *name) { - assert(f->msgdef == NULL); - free(f->name); - f->name = strdup(name); - return true; -} - -bool upb_fielddef_settype(upb_fielddef *f, uint8_t type) { - assert(!f->finalized); +bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { + assert(upb_fielddef_ismutable(f)); upb_fielddef_uninit_default(f); f->type = type; upb_fielddef_init_default(f); return true; } -bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) { - assert(!f->finalized); +bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { + assert(upb_fielddef_ismutable(f)); f->label = label; return true; } void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { - assert(!f->finalized); - assert(!upb_isstring(f)); + assert(upb_fielddef_ismutable(f)); + assert(!upb_isstring(f) && !upb_issubmsg(f)); + if (f->default_is_string) { + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); + upb_byteregion_free(bytes); + } f->defaultval = value; + f->default_is_string = false; } -void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { +bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM)); if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); upb_byteregion_free(bytes); - } - upb_value_setbyteregion(&f->defaultval, upb_byteregion_newl(str, len)); + } else { + assert(f->type == UPB_TYPE(ENUM)); + } + if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + upb_byteregion *r = upb_byteregion_newl(str, len); + upb_value_setbyteregion(&f->defaultval, r); + upb_bytesuccess_t ret = upb_byteregion_fetch(r); + (void)ret; + assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); + assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r)); f->default_is_string = true; + return true; } void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { @@ -404,82 +566,106 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { } void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { - assert(!f->finalized); - // TODO: string ownership? + assert(upb_fielddef_ismutable(f)); + // TODO: we need an ownership/freeing mechanism for dynamically-allocated + // fvals. One possibility is to let the user supply a free() function + // and call it when the fval is no longer referenced. Would have to + // ensure that no common use cases need cycles. + // + // For now the fval has no ownership; the caller must simply guarantee + // somehow that it outlives any handlers/plan. f->fval = fval; } -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) { - assert(!f->finalized); - f->accessor = vtbl; +void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) { + assert(upb_fielddef_ismutable(f)); + f->hasbit = hasbit; } -bool upb_fielddef_settypename(upb_fielddef *f, const char *name) { - upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(name)); - return true; +void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) { + assert(upb_fielddef_ismutable(f)); + f->offset = offset; } -// Returns an ordering of fields based on: -// 1. value size (small to large). -// 2. field number. -static int upb_fielddef_cmpval(const void *_f1, const void *_f2) { - upb_fielddef *f1 = *(void**)_f1; - upb_fielddef *f2 = *(void**)_f2; - size_t size1 = upb_types[f1->type].size; - size_t size2 = upb_types[f2->type].size; - if (size1 != size2) return size1 - size2; - // Otherwise return in number order. - return f1->number - f2->number; +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) { + assert(upb_fielddef_ismutable(f)); + f->accessor = tbl; } -// Returns an ordering of all fields based on: -// 1. required/optional (required fields first). -// 2. field number -static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) { - upb_fielddef *f1 = *(void**)_f1; - upb_fielddef *f2 = *(void**)_f2; - size_t req1 = f1->label == UPB_LABEL(REQUIRED); - size_t req2 = f2->label == UPB_LABEL(REQUIRED); - if (req1 != req2) return req1 - req2; - // Otherwise return in number order. - return f1->number - f2->number; +static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) { + if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP)) + return upb_dyncast_msgdef_const(subdef) != NULL; + else if (f->type == UPB_TYPE(ENUM)) + return upb_dyncast_enumdef_const(subdef) != NULL; + else { + assert(false); + return false; + } +} + +bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) { + assert(upb_fielddef_ismutable(f)); + assert(upb_hassubdef(f)); + assert(subdef); + if (!upb_subtype_typecheck(f, subdef)) return false; + if (f->subdef_is_symbolic) free(f->sub.name); + f->sub.def = subdef; + f->subdef_is_symbolic = false; + return true; +} + +bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) { + assert(upb_fielddef_ismutable(f)); + assert(upb_hassubdef(f)); + if (f->subdef_is_symbolic) free(f->sub.name); + f->sub.name = strdup(name); + f->subdef_is_symbolic = true; + return true; } /* upb_msgdef *****************************************************************/ -upb_msgdef *upb_msgdef_new() { +upb_msgdef *upb_msgdef_new(void *owner) { upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + if (!m) return NULL; + if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2; + if (!upb_inttable_init(&m->itof)) goto err2; + if (!upb_strtable_init(&m->ntof)) goto err1; m->size = 0; m->hasbit_bytes = 0; m->extstart = 0; m->extend = 0; return m; + +err1: + upb_inttable_uninit(&m->itof); +err2: + free(m); + return NULL; } static void upb_msgdef_free(upb_msgdef *m) { - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_free(upb_msg_iter_field(i)); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); + upb_strtable_uninit(&m->ntof); + upb_inttable_uninit(&m->itof); upb_def_uninit(&m->base); free(m); } -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m) { - upb_msgdef *newm = upb_msgdef_new(); - newm->size = m->size; - newm->hasbit_bytes = m->hasbit_bytes; - newm->extstart = m->extstart; - newm->extend = m->extend; +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner) { + upb_msgdef *newm = upb_msgdef_new(owner); + if (!newm) return NULL; + upb_msgdef_setsize(newm, upb_msgdef_size(m)); + upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m)); + upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m)); + upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m))); upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i))); + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); + if (!f || !upb_msgdef_addfield(newm, f, &f)) { + upb_msgdef_unref(newm, owner); + return NULL; + } } return newm; } @@ -506,160 +692,69 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) { return true; } -bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n) { +bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, + void *ref_donor) { // Check constraints for all fields before performing any action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; - assert(upb_atomic_read(&f->refcount) > 0); - if (f->name == NULL || f->number == 0 || - upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) + if (f->msgdef != NULL || + upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 || + upb_msgdef_itof(m, upb_fielddef_number(f)) || + upb_msgdef_ntof(m, upb_fielddef_name(f))) return false; } // Constraint checks ok, perform the action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; - upb_msgdef_ref(m); - assert(f->msgdef == NULL); f->msgdef = m; - upb_itof_ent itof_ent = {0, f}; - upb_inttable_insert(&m->itof, f->number, &itof_ent); - upb_strtable_insert(&m->ntof, f->name, &f); + upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_fielddef_ref(f, m); + if (ref_donor) upb_fielddef_unref(f, ref_donor); } return true; } -static int upb_div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -void upb_msgdef_layout(upb_msgdef *m) { - // Create an ordering over the fields, but only include fields with accessors. - upb_fielddef **sorted_fields = - malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m)); - int n = 0; - upb_msg_iter i; - for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (f->accessor) sorted_fields[n++] = f; - } - - m->hasbit_bytes = upb_div_round_up(n, 8); - m->size = m->hasbit_bytes; // + header_size? - - // Assign hasbits. - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit); - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - f->hasbit = i; - } - - // Assign value offsets. - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval); - size_t max_align = 0; - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - const upb_type_info *type_info = &upb_types[f->type]; - size_t size = type_info->size; - size_t align = type_info->align; - if (upb_isseq(f)) { - size = sizeof(void*); - align = alignof(void*); - } - - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as a - // whole must be a multiple of the greatest alignment of any member. - f->offset = upb_align_up(m->size, align); - m->size = f->offset + size; - max_align = UPB_MAX(max_align, align); - } - if (max_align > 0) m->size = upb_align_up(m->size, max_align); - - free(sorted_fields); -} - -upb_msg_iter upb_msg_begin(const upb_msgdef *m) { - return upb_inttable_begin(&m->itof); +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { + upb_inttable_begin(iter, &m->itof); } -upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter) { - return upb_inttable_next(&m->itof, iter); -} +void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } /* upb_symtab *****************************************************************/ -typedef struct { - upb_def *def; -} upb_symtab_ent; - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_symtab_ent *upb_resolve(const upb_strtable *t, - const char *base, const char *sym) { - if(strlen(sym) == 0) return NULL; - if(sym[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - return upb_strtable_lookup(t, sym + 1); - } else { - // Remove components from base until we find an entry or run out. - // TODO: This branch is totally broken, but currently not used. - (void)base; - assert(false); - return NULL; - } -} - -static void _upb_symtab_free(upb_strtable *t) { - upb_strtable_iter i; - upb_strtable_begin(&i, t); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - assert(upb_atomic_read(&e->def->refcount) == 0); - upb_def_free(e->def); - } - upb_strtable_free(t); -} - static void upb_symtab_free(upb_symtab *s) { - _upb_symtab_free(&s->symtab); - for (uint32_t i = 0; i < s->olddefs.len; i++) { - upb_def *d = s->olddefs.defs[i]; - assert(upb_atomic_read(&d->refcount) == 0); - upb_def_free(d); - } - upb_rwlock_destroy(&s->lock); - upb_deflist_uninit(&s->olddefs); + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) + upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); + upb_strtable_uninit(&s->symtab); free(s); } void upb_symtab_ref(const upb_symtab *_s) { upb_symtab *s = (upb_symtab*)_s; - upb_atomic_ref(&s->refcount); + s->refcount++; } void upb_symtab_unref(const upb_symtab *_s) { upb_symtab *s = (upb_symtab*)_s; - if(s && upb_atomic_unref(&s->refcount)) { + if(s && --s->refcount == 0) { upb_symtab_free(s); } } upb_symtab *upb_symtab_new() { upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - upb_deflist_init(&s->olddefs); + s->refcount = 1; + upb_strtable_init(&s->symtab); return s; } const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, - upb_deftype_t type) { - upb_rwlock_rdlock(&s->lock); + upb_deftype_t type, void *owner) { int total = upb_strtable_count(&s->symtab); // We may only use part of this, depending on how many symbols are of the // correct type. @@ -668,177 +763,252 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, upb_strtable_begin(&iter, &s->symtab); int i = 0; for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&iter); - upb_def *def = e->def; + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); assert(def); if(type == UPB_DEF_ANY || def->type == type) defs[i++] = def; } - upb_rwlock_unlock(&s->lock); *count = i; - for(i = 0; i < *count; i++) upb_def_ref(defs[i]); + if (owner) + for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner); return defs; } -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, + void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = v ? upb_value_getptr(*v) : NULL; + if (ret) upb_def_ref(ret, owner); return ret; } -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, + void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *def = v ? upb_value_getptr(*v) : NULL; upb_msgdef *ret = NULL; - if(e && e->def->type == UPB_DEF_MSG) { - ret = upb_downcast_msgdef(e->def); - upb_def_ref(UPB_UPCAST(ret)); + if(def && def->type == UPB_DEF_MSG) { + ret = upb_downcast_msgdef(def); + upb_def_ref(def, owner); } - upb_rwlock_unlock(&s->lock); return ret; } +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_def *upb_resolvename(const upb_strtable *t, + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + const upb_value *v = upb_strtable_lookup(t, sym + 1); + return v ? upb_value_getptr(*v) : NULL; + } else { + // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. + (void)base; + assert(false); + return NULL; + } +} + const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); + const char *sym, void *owner) { + upb_def *ret = upb_resolvename(&s->symtab, base, sym); + if (ret) upb_def_ref(ret, owner); return ret; } -bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n, - upb_strtable *addtab) { - // This linear search makes the DFS O(n^2) in the length of the paths. - // Could make this O(n) with a hash table, but n is small. - for (int i = 0; i < n; i++) { - if (def == open_defs[i]) return false; - } - - bool needcopy = false; - upb_msgdef *m = upb_dyncast_msgdef(def); - if (m) { - upb_msg_iter i; - open_defs[n++] = def; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_hassubdef(f)) continue; - needcopy |= upb_symtab_dfs(f->def, open_defs, n, addtab); +// Adds dups of any existing def that can reach a def with the same name as one +// of "defs." This is to provide a consistent output graph as documented in +// the header file. We use a modified depth-first traversal that traverses +// each SCC (which we already computed) as if it were a single node. This +// allows us to traverse the possibly-cyclic graph as if it were a DAG and to +// easily dup the correct set of nodes with O(n) time. +// +// Returns true if defs that can reach "def" need to be duplicated into deftab. +static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab, + void *new_owner, upb_inttable *seen, + upb_status *s) { + // Memoize results of this function for efficiency (since we're traversing a + // DAG this is not needed to limit the depth of the search). + upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); + if (v) return upb_value_getbool(*v); + + // Visit submessages for all messages in the SCC. + bool need_dup = false; + const upb_def *base = def; + do { + assert(upb_def_isfinalized(def)); + if (def->type == UPB_DEF_FIELD) continue; + upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def)); + if (v) { + upb_def *add_def = upb_value_getptr(*v); + if (add_def->refcount.next && add_def->refcount.next != &def->refcount) { + upb_status_seterrf(s, "conflicting existing defs for name: '%s'", + upb_def_fullname(def)); + return false; + } + need_dup = true; + } + const upb_msgdef *m = upb_dyncast_msgdef_const(def); + if (m) { + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_hassubdef(f)) continue; + // |= to avoid short-circuit; we need its side-effects. + need_dup |= upb_resolve_dfs( + upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s); + if (!upb_ok(s)) return false; + } } + } while ((def = (upb_def*)def->refcount.next) != base); + + if (need_dup) { + // Dup any defs that don't already have entries in deftab. + def = base; + do { + if (def->type == UPB_DEF_FIELD) continue; + const char *name = upb_def_fullname(def); + if (upb_strtable_lookup(deftab, name) == NULL) { + upb_def *newdef = upb_def_dup(def, new_owner); + if (!newdef) goto oom; + // We temporarily use this field to track who we were dup'd from. + newdef->refcount.next = (upb_refcount*)def; + if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef))) + goto oom; + } + } while ((def = (upb_def*)def->refcount.next) != base); } - bool replacing = (upb_strtable_lookup(addtab, m->base.fqname) != NULL); - if (needcopy && !replacing) { - upb_symtab_ent e = {upb_def_dup(def)}; - upb_strtable_insert(addtab, def->fqname, &e); - replacing = true; - } - return replacing; -} + upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); + return need_dup; -bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status) { - upb_rwlock_wrlock(&s->lock); +oom: + upb_status_seterrliteral(s, "out of memory"); + return false; +} - // Add all defs to a table for resolution. +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status) { + upb_def **add_defs = NULL; upb_strtable addtab; - upb_strtable_init(&addtab, n, sizeof(upb_symtab_ent)); + if (!upb_strtable_init(&addtab)) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } + + // Add new defs to table. for (int i = 0; i < n; i++) { upb_def *def = defs[i]; - if (upb_strtable_lookup(&addtab, def->fqname)) { - upb_status_seterrf(status, "Conflicting defs named '%s'", def->fqname); - upb_strtable_free(&addtab); - return false; + assert(upb_def_ismutable(def)); + const char *fullname = upb_def_fullname(def); + if (!fullname) { + upb_status_seterrliteral( + status, "Anonymous defs cannot be added to a symtab"); + goto err; } - upb_strtable_insert(&addtab, def->fqname, &def); + if (upb_strtable_lookup(&addtab, fullname) != NULL) { + upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); + goto err; + } + if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) + goto oom_err; + // We temporarily use this field to indicate that we came from the user's + // list rather than being dup'd. + def->refcount.next = NULL; } - // All existing defs that can reach defs that are being replaced must - // themselves be replaced with versions that will point to the new defs. - // Do a DFS -- any path that finds a new def must replace all ancestors. - upb_strtable *symtab = &s->symtab; + // Add dups of any existing def that can reach a def with the same name as + // one of "defs." + upb_inttable seen; + if (!upb_inttable_init(&seen)) goto oom_err; upb_strtable_iter i; - upb_strtable_begin(&i, symtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - upb_symtab_dfs(e->def, open_defs, 0, &addtab); + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_resolve_dfs(def, &addtab, ref_donor, &seen, status); + if (!upb_ok(status)) goto err; } + upb_inttable_uninit(&seen); - // Resolve all refs. + // Now using the table, resolve symbolic references. upb_strtable_begin(&i, &addtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_msgdef *m = upb_dyncast_msgdef(def); + if (!m) continue; // Type names are resolved relative to the message in which they appear. - const char *base = m->base.fqname; + const char *base = upb_def_fullname(UPB_UPCAST(m)); upb_msg_iter j; - for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) { - upb_fielddef *f = upb_msg_iter_field(j); - if (f->type == 0) { - upb_status_seterrf(status, "Field type was not set."); - return false; - } - - if (!upb_hassubdef(f)) continue; // No resolving necessary. - upb_downcast_unresolveddef(f->def); // Type check. - const char *name = f->def->fqname; - - // Resolve from either the addtab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(&addtab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_status_seterrf(status, "could not resolve symbol '%s' " - "in context '%s'", name, base); - return false; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + const char *name = upb_fielddef_subtypename(f); + if (name) { + upb_def *subdef = upb_resolvename(&addtab, base, name); + if (subdef == NULL) { + upb_status_seterrf( + status, "couldn't resolve name '%s' in message '%s'", name, base); + goto err; + } else if (!upb_fielddef_setsubdef(f, subdef)) { + upb_status_seterrf( + status, "def '%s' had the wrong type for field '%s'", + upb_def_fullname(subdef), upb_fielddef_name(f)); + goto err; + } } - // Check the type of the found def. - upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_status_seterrliteral(status, "Unexpected type"); - return false; - } - if (!upb_fielddef_resolve(f, found->def, status)) return false; + if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) && + !upb_fielddef_resolvedefault(f, status)) + goto err; } } - // The defs in the transaction have been vetted, and can be moved to the - // symtab without causing errors. + // We need an array of the defs in addtab, for passing to upb_finalize. + add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); + if (add_defs == NULL) goto oom_err; upb_strtable_begin(&i, &addtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i); - upb_def_movetosymtab(tmptab_e->def, s); - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_deflist_push(&s->olddefs, symtab_e->def); - symtab_e->def = tmptab_e->def; + for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) + add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); + + // Restore the next pointer that we stole. + for (int i = 0; i < n; i++) + add_defs[i]->refcount.next = &add_defs[i]->refcount; + + if (!upb_finalize(add_defs, n, status)) goto err; + upb_strtable_uninit(&addtab); + + for (int i = 0; i < n; i++) { + upb_def *def = add_defs[i]; + const char *name = upb_def_fullname(def); + upb_def_donateref(def, ref_donor, s); + upb_value *v = upb_strtable_lookup(&s->symtab, name); + if(v) { + upb_def_unref(upb_value_getptr(*v), s); + upb_value_setptr(v, def); } else { - upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e); + upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); } } - - upb_strtable_free(&addtab); - upb_rwlock_unlock(&s->lock); - upb_symtab_gc(s); + free(add_defs); return true; -} -void upb_symtab_gc(upb_symtab *s) { - (void)s; - // TODO. +oom_err: + upb_status_seterrliteral(status, "out of memory"); +err: { + // Need to unref any defs we dup'd (we can distinguish them from defs that + // the user passed in by their def->refcount.next pointers). + upb_strtable_iter i; + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + if (def->refcount.next) upb_def_unref(def, s); + } + } + upb_strtable_uninit(&addtab); + free(add_defs); + return false; } |