diff options
Diffstat (limited to 'upb')
-rw-r--r-- | upb/atomic.h | 181 | ||||
-rw-r--r-- | upb/bytestream.c | 9 | ||||
-rw-r--r-- | upb/bytestream.h | 6 | ||||
-rw-r--r-- | upb/def.c | 1194 | ||||
-rw-r--r-- | upb/def.h | 619 | ||||
-rw-r--r-- | upb/descriptor/descriptor_const.h (renamed from upb/descriptor_const.h) | 266 | ||||
-rw-r--r-- | upb/descriptor/reader.c (renamed from upb/descriptor.c) | 80 | ||||
-rw-r--r-- | upb/descriptor/reader.h (renamed from upb/descriptor.h) | 30 | ||||
-rw-r--r-- | upb/handlers.c | 64 | ||||
-rw-r--r-- | upb/handlers.h | 51 | ||||
-rw-r--r-- | upb/msg.c | 322 | ||||
-rw-r--r-- | upb/msg.h | 178 | ||||
-rw-r--r-- | upb/pb/decoder.c | 141 | ||||
-rw-r--r-- | upb/pb/decoder_x64.dasc | 141 | ||||
-rw-r--r-- | upb/pb/glue.c | 94 | ||||
-rw-r--r-- | upb/pb/glue.h | 17 | ||||
-rw-r--r-- | upb/pb/textprinter.c | 7 | ||||
-rw-r--r-- | upb/pb/varint.h | 10 | ||||
-rw-r--r-- | upb/refcount.c | 224 | ||||
-rw-r--r-- | upb/refcount.h | 70 | ||||
-rw-r--r-- | upb/table.c | 568 | ||||
-rw-r--r-- | upb/table.h | 238 | ||||
-rw-r--r-- | upb/upb.c | 39 | ||||
-rw-r--r-- | upb/upb.h | 108 |
24 files changed, 2178 insertions, 2479 deletions
diff --git a/upb/atomic.h b/upb/atomic.h deleted file mode 100644 index 2478fe4..0000000 --- a/upb/atomic.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Google Inc. See LICENSE for details. - * Author: Josh Haberman <jhaberman@gmail.com> - * - * Only a very small part of upb is thread-safe. Notably, individual - * messages, arrays, and strings are *not* thread safe for mutating. - * However, we do make message *metadata* such as upb_msgdef and - * upb_symtab thread-safe, and their ownership is tracked via atomic - * refcounting. This header implements the small number of atomic - * primitives required to support this. The primitives we implement - * are: - * - * - a reader/writer lock (wrappers around platform-provided mutexes). - * - an atomic refcount. - * - * TODO: This needs some revisiting/refinement, see: - * http://code.google.com/p/upb/issues/detail?id=8 - */ - -#ifndef UPB_ATOMIC_H_ -#define UPB_ATOMIC_H_ - -#include <stdbool.h> -#include <assert.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -// Until this stuff is actually working, make thread-unsafe the default. -#define UPB_THREAD_UNSAFE - -#ifdef UPB_THREAD_UNSAFE - -/* Non-thread-safe implementations. ******************************************/ - -typedef struct { - int v; -} upb_atomic_t; - -#define UPB_ATOMIC_INIT(x) {x} - -INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; } -INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; } -INLINE bool upb_atomic_unref(upb_atomic_t *a) { assert(a->v > 0); return --a->v == 0; } -INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; } -INLINE bool upb_atomic_add(upb_atomic_t *a, int val) { - a->v += val; - return a->v == 0; -} - -#endif - -/* Atomic refcount ************************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -/* Already defined above. */ - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 - -/* GCC includes atomic primitives. */ - -typedef struct { - volatile int v; -} upb_atomic_t; - -INLINE void upb_atomic_init(upb_atomic_t *a, int val) { - a->v = val; - __sync_synchronize(); /* Ensure the initialized value is visible. */ -} - -INLINE bool upb_atomic_ref(upb_atomic_t *a) { - return __sync_fetch_and_add(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_add(upb_atomic_t *a, int n) { - return __sync_add_and_fetch(&a->v, n) == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_t *a) { - return __sync_sub_and_fetch(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_read(upb_atomic_t *a) { - return __sync_fetch_and_add(&a->v, 0); -} - -#elif defined(WIN32) - -/* Windows defines atomic increment/decrement. */ -#include <Windows.h> - -typedef struct { - volatile LONG val; -} upb_atomic_t; - -INLINE void upb_atomic_init(upb_atomic_t *a, int val) { - InterlockedExchange(&a->val, val); -} - -INLINE bool upb_atomic_ref(upb_atomic_t *a) { - return InterlockedIncrement(&a->val) == 1; -} - -INLINE bool upb_atomic_unref(upb_atomic_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -INLINE bool upb_atomic_only(upb_atomic_t *a) { - return upb_atomic_read(a) == 1; -} - -/* Reader/Writer lock. ********************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { (void)l; } - -#elif defined(UPB_USE_PTHREADS) - -#include <pthread.h> - -typedef struct { - pthread_rwlock_t lock; -} upb_rwlock_t; - -INLINE void upb_rwlock_init(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_init(&l->lock, NULL); -} - -INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_destroy(&l->lock); -} - -INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_rdlock(&l->lock); -} - -INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_wrlock(&l->lock); -} - -INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_unlock(&l->lock); -} - -#else -#error Reader/writer lock is not defined for your platform/CPU. \ - Implement it or compile with UPB_THREAD_UNSAFE. -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ATOMIC_H_ */ diff --git a/upb/bytestream.c b/upb/bytestream.c index 812e552..8feb678 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -32,8 +32,6 @@ upb_byteregion *upb_byteregion_newl(const void *str, size_t len) { memcpy(ptr, str, len); ptr[len] = '\0'; upb_stringsrc_reset(src, ptr, len); - upb_byteregion_fetch(upb_stringsrc_allbytes(src)); - assert(len == upb_byteregion_available(upb_stringsrc_allbytes(src), 0)); return upb_stringsrc_allbytes(src); } @@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) { static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) { upb_stdio_buf **reuse = NULL; // XXX - uint32_t num_reused = 0, num_inuse = 0; + int num_reused = 0, num_inuse = 0; // Could sweep only a subset of bufs if this was a hotspot. - for (uint32_t i = 0; i < s->nbuf; i++) { + for (int i = 0; i < s->nbuf; i++) { upb_stdio_buf *buf = s->bufs[i]; if (buf->refcount > 0) { s->bufs[num_inuse++] = buf; @@ -243,10 +241,9 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) { upb_stringsrc *src = _src; - assert(ofs <= src->len); + assert(ofs < src->len); if (ofs == src->len) { upb_status_seteof(&src->bytesrc.status); - *read = 0; return UPB_BYTE_EOF; } *read = src->len - ofs; diff --git a/upb/bytestream.h b/upb/bytestream.h index fe049d2..3217ee1 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -372,8 +372,7 @@ INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) { } INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) { - int i; - for (i = 0; i < len; i++) + for (int i = 0; i < len; i++) if (upb_bytesink_write(sink, &ch, 1) < 0) return -1; return len; @@ -436,7 +435,8 @@ typedef struct { FILE *file; bool should_close; upb_stdio_buf **bufs; - uint32_t nbuf, szbuf; + int nbuf; + uint32_t szbuf; } upb_stdio; void upb_stdio_init(upb_stdio *stdio); @@ -1,7 +1,7 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2008-2009 Google Inc. See LICENSE for details. + * Copyright (c) 2008-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> */ @@ -11,168 +11,283 @@ #include "upb/bytestream.h" #include "upb/def.h" -#define alignof(t) offsetof(struct { char c; t x; }, x) +// isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. +static bool upb_isbetween(char c, char low, char high) { + return c >= low && c <= high; +} -void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; +static bool upb_isletter(char c) { + return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; } -void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]); - free(l->defs); +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); +static bool upb_isident(const char *str, size_t len, bool full) { + bool start = true; + for (size_t i = 0; i < len; i++) { + char c = str[i]; + if (c == '.') { + if (start || !full) return false; + start = true; + } else if (start) { + if (!upb_isletter(c)) return false; + start = false; + } else { + if (!upb_isalphanum(c)) return false; + } } - l->defs[l->len++] = d; + return !start; } /* upb_def ********************************************************************/ static void upb_msgdef_free(upb_msgdef *m); +static void upb_fielddef_free(upb_fielddef *f); static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); -bool upb_def_ismutable(const upb_def *def) { return def->symtab == NULL; } +bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; } +bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; } -bool upb_def_setfqname(upb_def *def, const char *fqname) { +bool upb_def_setfullname(upb_def *def, const char *fullname) { assert(upb_def_ismutable(def)); - free(def->fqname); - def->fqname = strdup(fqname); - return true; // TODO: check for acceptable characters. -} - -static void upb_def_free(upb_def *def) { - switch (def->type) { - case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; - case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; - default: - assert(false); - } + if (!upb_isident(fullname, strlen(fullname), true)) return false; + free(def->fullname); + def->fullname = strdup(fullname); + return true; } -upb_def *upb_def_dup(const upb_def *def) { +upb_def *upb_def_dup(const upb_def *def, void *o) { switch (def->type) { case UPB_DEF_MSG: - return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def))); + return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); + case UPB_DEF_FIELD: + return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); case UPB_DEF_ENUM: - return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def))); + return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); default: assert(false); return NULL; } } -// Prior to being in a symtab, the def's refcount controls the lifetime of the -// def itself. If the refcount falls to zero, the def is deleted. Once the -// def belongs to a symtab, the def is owned by the symtab and its refcount -// determines whether the def owns a ref on the symtab or not. -void upb_def_ref(const upb_def *_def) { - upb_def *def = (upb_def*)_def; // Need to modify refcount. - if (upb_atomic_ref(&def->refcount) && def->symtab) - upb_symtab_ref(def->symtab); -} - -static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) { - assert(upb_atomic_read(&d->refcount) > 0); - d->symtab = s; - upb_symtab_ref(s); - upb_msgdef *m = upb_dyncast_msgdef(d); - if (m) upb_inttable_compact(&m->itof); +void upb_def_ref(const upb_def *_def, void *owner) { + upb_def *def = (upb_def*)_def; + upb_refcount_ref(&def->refcount, owner); } -void upb_def_unref(const upb_def *_def) { - upb_def *def = (upb_def*)_def; // Need to modify refcount. +void upb_def_unref(const upb_def *_def, void *owner) { + upb_def *def = (upb_def*)_def; if (!def) return; - if (upb_atomic_unref(&def->refcount)) { - if (def->symtab) { - upb_symtab_unref(def->symtab); - // Def might be deleted now. - } else { - upb_def_free(def); + if (!upb_refcount_unref(&def->refcount, owner)) return; + upb_def *base = def; + // Free all defs in the SCC. + do { + upb_def *next = (upb_def*)def->refcount.next; + switch (def->type) { + case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; + case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break; + case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; + default: + assert(false); } - } + def = next; + } while(def != base); } -static void upb_def_init(upb_def *def, upb_deftype_t type) { +static bool upb_def_init(upb_def *def, upb_deftype_t type, void *owner) { def->type = type; - def->fqname = NULL; - def->symtab = NULL; - upb_atomic_init(&def->refcount, 1); + def->is_finalized = false; + def->fullname = NULL; + return upb_refcount_init(&def->refcount, owner); } static void upb_def_uninit(upb_def *def) { - free(def->fqname); + upb_refcount_uninit(&def->refcount); + free(def->fullname); } +void upb_def_donateref(const upb_def *_def, void *from, void *to) { + upb_def *def = (upb_def*)_def; + upb_refcount_donateref(&def->refcount, from, to); +} -/* upb_unresolveddef **********************************************************/ - -// Unresolved defs are used as temporary placeholders for a def whose name has -// not been resolved yet. During the name resolution step, all unresolved defs -// are replaced with pointers to the actual def being referenced. -typedef struct _upb_unresolveddef { - upb_def base; -} upb_unresolveddef; +static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) { + upb_def *def = (upb_def*)refcount; + switch (def->type) { + case UPB_DEF_MSG: { + upb_msgdef *m = upb_downcast_msgdef(def); + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + upb_refcount_visit(refcount, &f->base.refcount, closure); + } + break; + } + case UPB_DEF_FIELD: { + upb_fielddef *f = upb_downcast_fielddef(def); + assert(f->msgdef); + upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure); + upb_def *subdef = f->sub.def; + if (subdef) + upb_refcount_visit(refcount, &subdef->refcount, closure); + break; + } + case UPB_DEF_ENUM: + case UPB_DEF_SERVICE: + case UPB_DEF_ANY: + break; + } +} -// Is passed a ref on the string. -static upb_unresolveddef *upb_unresolveddef_new(const char *str) { - upb_unresolveddef *def = malloc(sizeof(*def)); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->base.fqname = strdup(str); - return def; +static bool upb_validate_field(const upb_fielddef *f, upb_status *s) { + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) { + upb_status_seterrliteral(s, "fielddef must have name and number set"); + return false; + } + if (upb_hassubdef(f)) { + if (f->subdef_is_symbolic) { + upb_status_seterrf(s, + "field %s has not been resolved", upb_fielddef_name(f)); + return false; + } else if (upb_fielddef_subdef(f) == NULL) { + upb_status_seterrf(s, + "field is %s missing required subdef", upb_fielddef_name(f)); + return false; + } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) { + upb_status_seterrf(s, + "field %s subtype is not being finalized", upb_fielddef_name(f)); + return false; + } + } + return true; } -static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { - upb_def_uninit(&def->base); - free(def); +bool upb_finalize(upb_def *const*defs, int n, upb_status *s) { + if (n >= UINT16_MAX - 1) { + upb_status_seterrliteral(s, "too many defs (max is 64k at a time)"); + return false; + } + + // First perform validation, in two passes so we can check that we have a + // transitive closure without needing to search. + for (int i = 0; i < n; i++) { + upb_def *def = defs[i]; + if (upb_def_isfinalized(def)) { + // Could relax this requirement if it's annoying. + upb_status_seterrliteral(s, "def is already finalized"); + goto err; + } else if (def->type == UPB_DEF_FIELD) { + upb_status_seterrliteral(s, "standalone fielddefs can not be finalized"); + goto err; + } else { + // Set now to detect transitive closure in the second pass. + def->is_finalized = true; + } + } + + for (int i = 0; i < n; i++) { + upb_msgdef *m = upb_dyncast_msgdef(defs[i]); + if (!m) continue; + upb_inttable_compact(&m->itof); + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + assert(f->msgdef == m); + if (!upb_validate_field(f, s)) goto err; + } + } + + // Validation all passed, now find strongly-connected components so that + // our refcounting works with cycles. + upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors); + + // Now that ref cycles have been removed it is safe to have each fielddef + // take a ref on its subdef (if any), but only if it's a member of another + // SCC. + for (int i = 0; i < n; i++) { + upb_msgdef *m = upb_dyncast_msgdef(defs[i]); + if (!m) continue; + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + f->base.is_finalized = true; + // Release the ref taken in upb_msgdef_addfields(). + upb_fielddef_unref(f, m); + if (!upb_hassubdef(f)) continue; + assert(upb_fielddef_subdef(f)); + if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) { + // Subdef is part of a different strongly-connected component. + upb_def_ref(f->sub.def, &f->sub.def); + f->subdef_is_owned = true; + } + } + } + + return true; + +err: + for (int i = 0; i < n; i++) { + defs[i]->is_finalized = false; + } + return false; } /* upb_enumdef ****************************************************************/ -upb_enumdef *upb_enumdef_new() { +upb_enumdef *upb_enumdef_new(void *owner) { upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + if (!e) return NULL; + if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2; + if (!upb_strtable_init(&e->ntoi)) goto err2; + if (!upb_inttable_init(&e->iton)) goto err1; return e; + +err1: + upb_strtable_uninit(&e->ntoi); +err2: + free(e); + return NULL; } static void upb_enumdef_free(upb_enumdef *e) { - upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - // Frees the ref taken when the string was parsed. - free(upb_enum_iter_name(i)); - } - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); + upb_inttable_iter i; + upb_inttable_begin(&i, &e->iton); + for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { + // To clean up the strdup() from upb_enumdef_addval(). + free(upb_value_getptr(upb_inttable_iter_value(&i))); + } + upb_strtable_uninit(&e->ntoi); + upb_inttable_uninit(&e->iton); upb_def_uninit(&e->base); free(e); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e) { - upb_enumdef *new_e = upb_enumdef_new(); +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner) { + upb_enumdef *new_e = upb_enumdef_new(owner); + if (!new_e) return NULL; upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i), - upb_enum_iter_number(i))); + for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { + bool success = upb_enumdef_addval( + new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i)); + if (!success) { + upb_enumdef_unref(new_e, owner); + return NULL; + } } return new_e; } -bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) { - if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) { + if (!upb_isident(name, strlen(name), false)) return false; + if (upb_enumdef_ntoi(e, name, NULL)) + return false; + if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) + return false; + if (!upb_inttable_lookup(&e->iton, num) && + !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name)))) return false; - upb_iton_ent ent = {0, strdup(name)}; - upb_strtable_insert(&e->ntoi, name, &num); - upb_inttable_insert(&e->iton, num, &ent); return true; } @@ -181,42 +296,70 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { e->defaultval = val; } -upb_enum_iter upb_enum_begin(const upb_enumdef *e) { - // We could iterate over either table here; the choice is arbitrary. - return upb_inttable_begin(&e->iton); +void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { + // We iterate over the ntoi table, to account for duplicate numbers. + upb_strtable_begin(i, &e->ntoi); } -upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter) { - return upb_inttable_next(&e->iton, iter); -} +void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } +bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } -const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) { - upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); - return e ? e->str : NULL; -} - -bool upb_enumdef_ntoil(upb_enumdef *def, const char *name, size_t len, int32_t *num) { - upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len); - if (!e) return false; - if (num) *num = e->value; +bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { + const upb_value *v = upb_strtable_lookup(&def->ntoi, name); + if (!v) return false; + if (num) *num = upb_value_getint32(*v); return true; } -bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num) { - return upb_enumdef_ntoil(e, name, strlen(name), num); +const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { + const upb_value *v = upb_inttable_lookup32(&def->iton, num); + return v ? upb_value_getptr(*v) : NULL; } /* upb_fielddef ***************************************************************/ +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(ctype, inmemory_type) \ + {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type} + +const upb_typeinfo upb_types[UPB_NUM_TYPES] = { + // END_GROUP is not real, but used to signify the pseudo-field that + // ends a group from within the group. + TYPE_INFO(void*, PTR), // ENDGROUP + TYPE_INFO(double, DOUBLE), // DOUBLE + TYPE_INFO(float, FLOAT), // FLOAT + TYPE_INFO(int64_t, INT64), // INT64 + TYPE_INFO(uint64_t, UINT64), // UINT64 + TYPE_INFO(int32_t, INT32), // INT32 + TYPE_INFO(uint64_t, UINT64), // FIXED64 + TYPE_INFO(uint32_t, UINT32), // FIXED32 + TYPE_INFO(bool, BOOL), // BOOL + TYPE_INFO(void*, BYTEREGION), // STRING + TYPE_INFO(void*, PTR), // GROUP + TYPE_INFO(void*, PTR), // MESSAGE + TYPE_INFO(void*, BYTEREGION), // BYTES + TYPE_INFO(uint32_t, UINT32), // UINT32 + TYPE_INFO(uint32_t, INT32), // ENUM + TYPE_INFO(int32_t, INT32), // SFIXED32 + TYPE_INFO(int64_t, INT64), // SFIXED64 + TYPE_INFO(int32_t, INT32), // SINT32 + TYPE_INFO(int64_t, INT64), // SINT64 +}; + static void upb_fielddef_init_default(upb_fielddef *f); -upb_fielddef *upb_fielddef_new() { +upb_fielddef *upb_fielddef_new(void *owner) { upb_fielddef *f = malloc(sizeof(*f)); + if (!f) return NULL; + if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) { + free(f); + return NULL; + } f->msgdef = NULL; - f->def = NULL; - upb_atomic_init(&f->refcount, 1); - f->finalized = false; + f->sub.def = NULL; + f->subdef_is_symbolic = false; + f->subdef_is_owned = false; f->label = UPB_LABEL(OPTIONAL); f->hasbit = -1; f->offset = 0; @@ -226,14 +369,68 @@ upb_fielddef *upb_fielddef_new() { // These are initialized to be invalid; the user must set them explicitly. // Could relax this later if it's convenient and non-confusing to have a // defaults for them. - f->name = NULL; - f->type = 0; + f->type = UPB_TYPE_NONE; f->number = 0; upb_fielddef_init_default(f); return f; } +static void upb_fielddef_uninit_default(upb_fielddef *f) { + if (f->default_is_string) + upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); +} + +static void upb_fielddef_free(upb_fielddef *f) { + if (f->subdef_is_owned) + upb_def_unref(f->sub.def, &f->sub.def); + upb_fielddef_uninit_default(f); + upb_def_uninit(UPB_UPCAST(f)); + free(f); +} + +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner) { + upb_fielddef *newf = upb_fielddef_new(owner); + if (!newf) return NULL; + upb_fielddef_settype(newf, upb_fielddef_type(f)); + upb_fielddef_setlabel(newf, upb_fielddef_label(f)); + upb_fielddef_setnumber(newf, upb_fielddef_number(f)); + upb_fielddef_setname(newf, upb_fielddef_name(f)); + upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f)); + upb_fielddef_setoffset(newf, upb_fielddef_offset(f)); + upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f)); + upb_fielddef_setfval(newf, upb_fielddef_fval(f)); + if (f->default_is_string) { + upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); + size_t len; + const char *ptr = upb_byteregion_getptr(r, 0, &len); + assert(len == upb_byteregion_len(r)); + upb_fielddef_setdefaultstr(newf, ptr, len); + } else { + upb_fielddef_setdefault(newf, upb_fielddef_default(f)); + } + + const char *srcname; + if (f->subdef_is_symbolic) { + srcname = f->sub.name; // Might be NULL. + } else { + srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL; + } + if (srcname) { + char *newname = malloc(strlen(f->sub.def->fullname) + 2); + if (!newname) { + upb_fielddef_unref(newf, owner); + return NULL; + } + strcpy(newname, "."); + strcat(newname, f->sub.def->fullname); + upb_fielddef_setsubtypename(newf, newname); + free(newname); + } + + return newf; +} + static void upb_fielddef_init_default(upb_fielddef *f) { f->default_is_string = false; switch (upb_fielddef_type(f)) { @@ -253,105 +450,62 @@ static void upb_fielddef_init_default(upb_fielddef *f) { case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - f->default_is_string = true; - upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); - break; + upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); + f->default_is_string = true; + break; case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; + case UPB_TYPE_ENDGROUP: assert(false); + case UPB_TYPE_NONE: break; } } -static void upb_fielddef_uninit_default(upb_fielddef *f) { - if (f->default_is_string) { - upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); - } -} - -static void upb_fielddef_free(upb_fielddef *f) { - upb_fielddef_uninit_default(f); - if (f->def) { - // We own a ref on the subdef iff we are not part of a msgdef. - if (f->msgdef == NULL) { - if (f->def) upb_downcast_unresolveddef(f->def); // assert() check. - upb_def_unref(f->def); - } - } - free(f->name); - free(f); -} - -void upb_fielddef_ref(upb_fielddef *f) { - // TODO. - (void)f; -} - -void upb_fielddef_unref(upb_fielddef *f) { - // TODO. - (void)f; - if (!f) return; - if (upb_atomic_unref(&f->refcount)) { - if (f->msgdef) { - upb_msgdef_unref(f->msgdef); - // fielddef might be deleted now. - } else { - upb_fielddef_free(f); - } +const upb_def *upb_fielddef_subdef(const upb_fielddef *f) { + if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) { + assert(f->sub.def); + return f->sub.def; + } else { + return f->subdef_is_symbolic ? NULL : f->sub.def; } } -upb_fielddef *upb_fielddef_dup(upb_fielddef *f) { - upb_fielddef *newf = upb_fielddef_new(); - newf->msgdef = f->msgdef; - newf->type = f->type; - newf->label = f->label; - newf->number = f->number; - newf->name = f->name; - upb_fielddef_settypename(newf, f->def->fqname); - return f; +upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) { + return (upb_def*)upb_fielddef_subdef(f); } -bool upb_fielddef_ismutable(const upb_fielddef *f) { - return !f->msgdef || upb_def_ismutable(UPB_UPCAST(f->msgdef)); +const char *upb_fielddef_subtypename(upb_fielddef *f) { + assert(upb_fielddef_ismutable(f)); + return f->subdef_is_symbolic ? f->sub.name : NULL; } -upb_def *upb_fielddef_subdef(const upb_fielddef *f) { - if (upb_hassubdef(f) && !upb_fielddef_ismutable(f)) - return f->def; - else - return NULL; -} - -static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { - assert(upb_dyncast_unresolveddef(f->def)); - upb_def_unref(f->def); - f->def = def; - if (f->type == UPB_TYPE(ENUM) && f->default_is_string) { - // Resolve the enum's default from a string to an integer. - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); // Points to either a real default or the empty string. - upb_enumdef *e = upb_downcast_enumdef(f->def); - int32_t val = 0; - // Could do a sanity check that the default value does not have embedded - // NULLs. - if (upb_byteregion_len(bytes) == 0) { - upb_value_setint32(&f->defaultval, e->defaultval); - } else { - size_t len; - // ptr is guaranteed to be NULL-terminated because the byteregion was - // created with upb_byteregion_newl(). - const char *ptr = upb_byteregion_getptr(bytes, 0, &len); - assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. - bool success = upb_enumdef_ntoi(e, ptr, &val); - if (!success) { - upb_status_seterrf( - s, "Default enum value (%s) is not a member of the enum", ptr); - return false; - } - upb_value_setint32(&f->defaultval, val); +// Could expose this to clients if a client wants to call it independently +// of upb_resolve() for whatever reason. +static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) { + if (!f->default_is_string) return true; + // Resolve the enum's default from a string to an integer. + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); // Points to either a real default or the empty string. + upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f)); + int32_t val = 0; + if (upb_byteregion_len(bytes) == 0) { + upb_value_setint32(&f->defaultval, e->defaultval); + } else { + size_t len; + // ptr is guaranteed to be NULL-terminated because the byteregion was + // created with upb_byteregion_newl(). + const char *ptr = upb_byteregion_getptr( + bytes, upb_byteregion_startofs(bytes), &len); + assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. + bool success = upb_enumdef_ntoi(e, ptr, &val); + if (!success) { + upb_status_seterrf( + s, "Default enum value (%s) is not a member of the enum", ptr); + return false; } - f->default_is_string = false; - upb_byteregion_free(bytes); + upb_value_setint32(&f->defaultval, val); } + f->default_is_string = false; + upb_byteregion_free(bytes); return true; } @@ -361,42 +515,50 @@ bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { return true; } -bool upb_fielddef_setname(upb_fielddef *f, const char *name) { - assert(f->msgdef == NULL); - free(f->name); - f->name = strdup(name); - return true; -} - -bool upb_fielddef_settype(upb_fielddef *f, uint8_t type) { - assert(!f->finalized); +bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { + assert(upb_fielddef_ismutable(f)); upb_fielddef_uninit_default(f); f->type = type; upb_fielddef_init_default(f); return true; } -bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) { - assert(!f->finalized); +bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { + assert(upb_fielddef_ismutable(f)); f->label = label; return true; } void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { - assert(!f->finalized); - assert(!upb_isstring(f)); + assert(upb_fielddef_ismutable(f)); + assert(!upb_isstring(f) && !upb_issubmsg(f)); + if (f->default_is_string) { + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); + upb_byteregion_free(bytes); + } f->defaultval = value; + f->default_is_string = false; } -void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { +bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM)); if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); upb_byteregion_free(bytes); - } - upb_value_setbyteregion(&f->defaultval, upb_byteregion_newl(str, len)); + } else { + assert(f->type == UPB_TYPE(ENUM)); + } + if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + upb_byteregion *r = upb_byteregion_newl(str, len); + upb_value_setbyteregion(&f->defaultval, r); + upb_bytesuccess_t ret = upb_byteregion_fetch(r); + (void)ret; + assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); + assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r)); f->default_is_string = true; + return true; } void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { @@ -404,82 +566,106 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { } void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { - assert(!f->finalized); - // TODO: string ownership? + assert(upb_fielddef_ismutable(f)); + // TODO: we need an ownership/freeing mechanism for dynamically-allocated + // fvals. One possibility is to let the user supply a free() function + // and call it when the fval is no longer referenced. Would have to + // ensure that no common use cases need cycles. + // + // For now the fval has no ownership; the caller must simply guarantee + // somehow that it outlives any handlers/plan. f->fval = fval; } -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) { - assert(!f->finalized); - f->accessor = vtbl; +void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) { + assert(upb_fielddef_ismutable(f)); + f->hasbit = hasbit; } -bool upb_fielddef_settypename(upb_fielddef *f, const char *name) { - upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(name)); - return true; +void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) { + assert(upb_fielddef_ismutable(f)); + f->offset = offset; } -// Returns an ordering of fields based on: -// 1. value size (small to large). -// 2. field number. -static int upb_fielddef_cmpval(const void *_f1, const void *_f2) { - upb_fielddef *f1 = *(void**)_f1; - upb_fielddef *f2 = *(void**)_f2; - size_t size1 = upb_types[f1->type].size; - size_t size2 = upb_types[f2->type].size; - if (size1 != size2) return size1 - size2; - // Otherwise return in number order. - return f1->number - f2->number; +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) { + assert(upb_fielddef_ismutable(f)); + f->accessor = tbl; } -// Returns an ordering of all fields based on: -// 1. required/optional (required fields first). -// 2. field number -static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) { - upb_fielddef *f1 = *(void**)_f1; - upb_fielddef *f2 = *(void**)_f2; - size_t req1 = f1->label == UPB_LABEL(REQUIRED); - size_t req2 = f2->label == UPB_LABEL(REQUIRED); - if (req1 != req2) return req1 - req2; - // Otherwise return in number order. - return f1->number - f2->number; +static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) { + if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP)) + return upb_dyncast_msgdef_const(subdef) != NULL; + else if (f->type == UPB_TYPE(ENUM)) + return upb_dyncast_enumdef_const(subdef) != NULL; + else { + assert(false); + return false; + } +} + +bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) { + assert(upb_fielddef_ismutable(f)); + assert(upb_hassubdef(f)); + assert(subdef); + if (!upb_subtype_typecheck(f, subdef)) return false; + if (f->subdef_is_symbolic) free(f->sub.name); + f->sub.def = subdef; + f->subdef_is_symbolic = false; + return true; +} + +bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) { + assert(upb_fielddef_ismutable(f)); + assert(upb_hassubdef(f)); + if (f->subdef_is_symbolic) free(f->sub.name); + f->sub.name = strdup(name); + f->subdef_is_symbolic = true; + return true; } /* upb_msgdef *****************************************************************/ -upb_msgdef *upb_msgdef_new() { +upb_msgdef *upb_msgdef_new(void *owner) { upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + if (!m) return NULL; + if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2; + if (!upb_inttable_init(&m->itof)) goto err2; + if (!upb_strtable_init(&m->ntof)) goto err1; m->size = 0; m->hasbit_bytes = 0; m->extstart = 0; m->extend = 0; return m; + +err1: + upb_inttable_uninit(&m->itof); +err2: + free(m); + return NULL; } static void upb_msgdef_free(upb_msgdef *m) { - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_free(upb_msg_iter_field(i)); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); + upb_strtable_uninit(&m->ntof); + upb_inttable_uninit(&m->itof); upb_def_uninit(&m->base); free(m); } -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m) { - upb_msgdef *newm = upb_msgdef_new(); - newm->size = m->size; - newm->hasbit_bytes = m->hasbit_bytes; - newm->extstart = m->extstart; - newm->extend = m->extend; +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner) { + upb_msgdef *newm = upb_msgdef_new(owner); + if (!newm) return NULL; + upb_msgdef_setsize(newm, upb_msgdef_size(m)); + upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m)); + upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m)); + upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m))); upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i))); + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); + if (!f || !upb_msgdef_addfield(newm, f, &f)) { + upb_msgdef_unref(newm, owner); + return NULL; + } } return newm; } @@ -506,160 +692,69 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) { return true; } -bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n) { +bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, + void *ref_donor) { // Check constraints for all fields before performing any action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; - assert(upb_atomic_read(&f->refcount) > 0); - if (f->name == NULL || f->number == 0 || - upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) + if (f->msgdef != NULL || + upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 || + upb_msgdef_itof(m, upb_fielddef_number(f)) || + upb_msgdef_ntof(m, upb_fielddef_name(f))) return false; } // Constraint checks ok, perform the action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; - upb_msgdef_ref(m); - assert(f->msgdef == NULL); f->msgdef = m; - upb_itof_ent itof_ent = {0, f}; - upb_inttable_insert(&m->itof, f->number, &itof_ent); - upb_strtable_insert(&m->ntof, f->name, &f); + upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_fielddef_ref(f, m); + if (ref_donor) upb_fielddef_unref(f, ref_donor); } return true; } -static int upb_div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -void upb_msgdef_layout(upb_msgdef *m) { - // Create an ordering over the fields, but only include fields with accessors. - upb_fielddef **sorted_fields = - malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m)); - int n = 0; - upb_msg_iter i; - for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (f->accessor) sorted_fields[n++] = f; - } - - m->hasbit_bytes = upb_div_round_up(n, 8); - m->size = m->hasbit_bytes; // + header_size? - - // Assign hasbits. - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit); - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - f->hasbit = i; - } - - // Assign value offsets. - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval); - size_t max_align = 0; - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - const upb_type_info *type_info = &upb_types[f->type]; - size_t size = type_info->size; - size_t align = type_info->align; - if (upb_isseq(f)) { - size = sizeof(void*); - align = alignof(void*); - } - - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as a - // whole must be a multiple of the greatest alignment of any member. - f->offset = upb_align_up(m->size, align); - m->size = f->offset + size; - max_align = UPB_MAX(max_align, align); - } - if (max_align > 0) m->size = upb_align_up(m->size, max_align); - - free(sorted_fields); -} - -upb_msg_iter upb_msg_begin(const upb_msgdef *m) { - return upb_inttable_begin(&m->itof); +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { + upb_inttable_begin(iter, &m->itof); } -upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter) { - return upb_inttable_next(&m->itof, iter); -} +void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } /* upb_symtab *****************************************************************/ -typedef struct { - upb_def *def; -} upb_symtab_ent; - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_symtab_ent *upb_resolve(const upb_strtable *t, - const char *base, const char *sym) { - if(strlen(sym) == 0) return NULL; - if(sym[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - return upb_strtable_lookup(t, sym + 1); - } else { - // Remove components from base until we find an entry or run out. - // TODO: This branch is totally broken, but currently not used. - (void)base; - assert(false); - return NULL; - } -} - -static void _upb_symtab_free(upb_strtable *t) { - upb_strtable_iter i; - upb_strtable_begin(&i, t); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - assert(upb_atomic_read(&e->def->refcount) == 0); - upb_def_free(e->def); - } - upb_strtable_free(t); -} - static void upb_symtab_free(upb_symtab *s) { - _upb_symtab_free(&s->symtab); - for (uint32_t i = 0; i < s->olddefs.len; i++) { - upb_def *d = s->olddefs.defs[i]; - assert(upb_atomic_read(&d->refcount) == 0); - upb_def_free(d); - } - upb_rwlock_destroy(&s->lock); - upb_deflist_uninit(&s->olddefs); + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) + upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); + upb_strtable_uninit(&s->symtab); free(s); } void upb_symtab_ref(const upb_symtab *_s) { upb_symtab *s = (upb_symtab*)_s; - upb_atomic_ref(&s->refcount); + s->refcount++; } void upb_symtab_unref(const upb_symtab *_s) { upb_symtab *s = (upb_symtab*)_s; - if(s && upb_atomic_unref(&s->refcount)) { + if(s && --s->refcount == 0) { upb_symtab_free(s); } } upb_symtab *upb_symtab_new() { upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - upb_deflist_init(&s->olddefs); + s->refcount = 1; + upb_strtable_init(&s->symtab); return s; } const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, - upb_deftype_t type) { - upb_rwlock_rdlock(&s->lock); + upb_deftype_t type, void *owner) { int total = upb_strtable_count(&s->symtab); // We may only use part of this, depending on how many symbols are of the // correct type. @@ -668,177 +763,252 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, upb_strtable_begin(&iter, &s->symtab); int i = 0; for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&iter); - upb_def *def = e->def; + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); assert(def); if(type == UPB_DEF_ANY || def->type == type) defs[i++] = def; } - upb_rwlock_unlock(&s->lock); *count = i; - for(i = 0; i < *count; i++) upb_def_ref(defs[i]); + if (owner) + for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner); return defs; } -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, + void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = v ? upb_value_getptr(*v) : NULL; + if (ret) upb_def_ref(ret, owner); return ret; } -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, + void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *def = v ? upb_value_getptr(*v) : NULL; upb_msgdef *ret = NULL; - if(e && e->def->type == UPB_DEF_MSG) { - ret = upb_downcast_msgdef(e->def); - upb_def_ref(UPB_UPCAST(ret)); + if(def && def->type == UPB_DEF_MSG) { + ret = upb_downcast_msgdef(def); + upb_def_ref(def, owner); } - upb_rwlock_unlock(&s->lock); return ret; } +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_def *upb_resolvename(const upb_strtable *t, + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + const upb_value *v = upb_strtable_lookup(t, sym + 1); + return v ? upb_value_getptr(*v) : NULL; + } else { + // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. + (void)base; + assert(false); + return NULL; + } +} + const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); + const char *sym, void *owner) { + upb_def *ret = upb_resolvename(&s->symtab, base, sym); + if (ret) upb_def_ref(ret, owner); return ret; } -bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n, - upb_strtable *addtab) { - // This linear search makes the DFS O(n^2) in the length of the paths. - // Could make this O(n) with a hash table, but n is small. - for (int i = 0; i < n; i++) { - if (def == open_defs[i]) return false; - } - - bool needcopy = false; - upb_msgdef *m = upb_dyncast_msgdef(def); - if (m) { - upb_msg_iter i; - open_defs[n++] = def; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_hassubdef(f)) continue; - needcopy |= upb_symtab_dfs(f->def, open_defs, n, addtab); +// Adds dups of any existing def that can reach a def with the same name as one +// of "defs." This is to provide a consistent output graph as documented in +// the header file. We use a modified depth-first traversal that traverses +// each SCC (which we already computed) as if it were a single node. This +// allows us to traverse the possibly-cyclic graph as if it were a DAG and to +// easily dup the correct set of nodes with O(n) time. +// +// Returns true if defs that can reach "def" need to be duplicated into deftab. +static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab, + void *new_owner, upb_inttable *seen, + upb_status *s) { + // Memoize results of this function for efficiency (since we're traversing a + // DAG this is not needed to limit the depth of the search). + upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); + if (v) return upb_value_getbool(*v); + + // Visit submessages for all messages in the SCC. + bool need_dup = false; + const upb_def *base = def; + do { + assert(upb_def_isfinalized(def)); + if (def->type == UPB_DEF_FIELD) continue; + upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def)); + if (v) { + upb_def *add_def = upb_value_getptr(*v); + if (add_def->refcount.next && add_def->refcount.next != &def->refcount) { + upb_status_seterrf(s, "conflicting existing defs for name: '%s'", + upb_def_fullname(def)); + return false; + } + need_dup = true; + } + const upb_msgdef *m = upb_dyncast_msgdef_const(def); + if (m) { + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_hassubdef(f)) continue; + // |= to avoid short-circuit; we need its side-effects. + need_dup |= upb_resolve_dfs( + upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s); + if (!upb_ok(s)) return false; + } } + } while ((def = (upb_def*)def->refcount.next) != base); + + if (need_dup) { + // Dup any defs that don't already have entries in deftab. + def = base; + do { + if (def->type == UPB_DEF_FIELD) continue; + const char *name = upb_def_fullname(def); + if (upb_strtable_lookup(deftab, name) == NULL) { + upb_def *newdef = upb_def_dup(def, new_owner); + if (!newdef) goto oom; + // We temporarily use this field to track who we were dup'd from. + newdef->refcount.next = (upb_refcount*)def; + if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef))) + goto oom; + } + } while ((def = (upb_def*)def->refcount.next) != base); } - bool replacing = (upb_strtable_lookup(addtab, m->base.fqname) != NULL); - if (needcopy && !replacing) { - upb_symtab_ent e = {upb_def_dup(def)}; - upb_strtable_insert(addtab, def->fqname, &e); - replacing = true; - } - return replacing; -} + upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); + return need_dup; -bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status) { - upb_rwlock_wrlock(&s->lock); +oom: + upb_status_seterrliteral(s, "out of memory"); + return false; +} - // Add all defs to a table for resolution. +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status) { + upb_def **add_defs = NULL; upb_strtable addtab; - upb_strtable_init(&addtab, n, sizeof(upb_symtab_ent)); + if (!upb_strtable_init(&addtab)) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } + + // Add new defs to table. for (int i = 0; i < n; i++) { upb_def *def = defs[i]; - if (upb_strtable_lookup(&addtab, def->fqname)) { - upb_status_seterrf(status, "Conflicting defs named '%s'", def->fqname); - upb_strtable_free(&addtab); - return false; + assert(upb_def_ismutable(def)); + const char *fullname = upb_def_fullname(def); + if (!fullname) { + upb_status_seterrliteral( + status, "Anonymous defs cannot be added to a symtab"); + goto err; } - upb_strtable_insert(&addtab, def->fqname, &def); + if (upb_strtable_lookup(&addtab, fullname) != NULL) { + upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); + goto err; + } + if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) + goto oom_err; + // We temporarily use this field to indicate that we came from the user's + // list rather than being dup'd. + def->refcount.next = NULL; } - // All existing defs that can reach defs that are being replaced must - // themselves be replaced with versions that will point to the new defs. - // Do a DFS -- any path that finds a new def must replace all ancestors. - upb_strtable *symtab = &s->symtab; + // Add dups of any existing def that can reach a def with the same name as + // one of "defs." + upb_inttable seen; + if (!upb_inttable_init(&seen)) goto oom_err; upb_strtable_iter i; - upb_strtable_begin(&i, symtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - upb_symtab_dfs(e->def, open_defs, 0, &addtab); + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_resolve_dfs(def, &addtab, ref_donor, &seen, status); + if (!upb_ok(status)) goto err; } + upb_inttable_uninit(&seen); - // Resolve all refs. + // Now using the table, resolve symbolic references. upb_strtable_begin(&i, &addtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_msgdef *m = upb_dyncast_msgdef(def); + if (!m) continue; // Type names are resolved relative to the message in which they appear. - const char *base = m->base.fqname; + const char *base = upb_def_fullname(UPB_UPCAST(m)); upb_msg_iter j; - for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) { - upb_fielddef *f = upb_msg_iter_field(j); - if (f->type == 0) { - upb_status_seterrf(status, "Field type was not set."); - return false; - } - - if (!upb_hassubdef(f)) continue; // No resolving necessary. - upb_downcast_unresolveddef(f->def); // Type check. - const char *name = f->def->fqname; - - // Resolve from either the addtab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(&addtab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_status_seterrf(status, "could not resolve symbol '%s' " - "in context '%s'", name, base); - return false; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + const char *name = upb_fielddef_subtypename(f); + if (name) { + upb_def *subdef = upb_resolvename(&addtab, base, name); + if (subdef == NULL) { + upb_status_seterrf( + status, "couldn't resolve name '%s' in message '%s'", name, base); + goto err; + } else if (!upb_fielddef_setsubdef(f, subdef)) { + upb_status_seterrf( + status, "def '%s' had the wrong type for field '%s'", + upb_def_fullname(subdef), upb_fielddef_name(f)); + goto err; + } } - // Check the type of the found def. - upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_status_seterrliteral(status, "Unexpected type"); - return false; - } - if (!upb_fielddef_resolve(f, found->def, status)) return false; + if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) && + !upb_fielddef_resolvedefault(f, status)) + goto err; } } - // The defs in the transaction have been vetted, and can be moved to the - // symtab without causing errors. + // We need an array of the defs in addtab, for passing to upb_finalize. + add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); + if (add_defs == NULL) goto oom_err; upb_strtable_begin(&i, &addtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i); - upb_def_movetosymtab(tmptab_e->def, s); - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_deflist_push(&s->olddefs, symtab_e->def); - symtab_e->def = tmptab_e->def; + for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) + add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); + + // Restore the next pointer that we stole. + for (int i = 0; i < n; i++) + add_defs[i]->refcount.next = &add_defs[i]->refcount; + + if (!upb_finalize(add_defs, n, status)) goto err; + upb_strtable_uninit(&addtab); + + for (int i = 0; i < n; i++) { + upb_def *def = add_defs[i]; + const char *name = upb_def_fullname(def); + upb_def_donateref(def, ref_donor, s); + upb_value *v = upb_strtable_lookup(&s->symtab, name); + if(v) { + upb_def_unref(upb_value_getptr(*v), s); + upb_value_setptr(v, def); } else { - upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e); + upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); } } - - upb_strtable_free(&addtab); - upb_rwlock_unlock(&s->lock); - upb_symtab_gc(s); + free(add_defs); return true; -} -void upb_symtab_gc(upb_symtab *s) { - (void)s; - // TODO. +oom_err: + upb_status_seterrliteral(status, "out of memory"); +err: { + // Need to unref any defs we dup'd (we can distinguish them from defs that + // the user passed in by their def->refcount.next pointers). + upb_strtable_iter i; + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + if (def->refcount.next) upb_def_unref(def, s); + } + } + upb_strtable_uninit(&addtab); + free(add_defs); + return false; } @@ -1,17 +1,17 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * Provides a mechanism for creating and linking proto definitions. - * These form the protobuf schema, and are used extensively throughout upb: + * Defs are upb's internal representation of the constructs that can appear + * in a .proto file: + * * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. * (TODO: definitions of services). * - * * Defs go through two distinct phases of life: * * 1. MUTABLE: when first created, the properties of the def can be set freely @@ -20,16 +20,15 @@ * not be used for any purpose except to set its properties (it can't be * used to parse anything, create any messages in memory, etc). * - * 2. FINALIZED: after being added to a symtab (which links the defs together) - * the defs become finalized (thread-safe and immutable). Programs may only - * access defs through a CONST POINTER during this stage -- upb_symtab will - * help you out with this requirement by only vending const pointers, but - * you need to make sure not to use any non-const pointers you still have - * sitting around. In practice this means that you may not call any setters - * on the defs (or functions that themselves call the setters). If you want - * to modify an existing immutable def, copy it with upb_*_dup(), modify the - * copy, and add the modified def to the symtab (replacing the existing - * def). + * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs, + * which makes them thread-safe and immutable. Finalized defs may only be + * accessed through a CONST POINTER. If you want to modify an existing + * immutable def, copy it with upb_*_dup() and modify and finalize the copy. + * + * The refcounting of defs works properly no matter what state the def is in. + * Once the def is finalized it is guaranteed that any def reachable from a + * live def is also live (so a ref on the base of a message tree keeps the + * whole tree alive). * * You can test for which stage of life a def is in by calling * upb_def_ismutable(). This is particularly useful for dynamic language @@ -46,181 +45,306 @@ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ -#include "upb/atomic.h" +#include "upb/refcount.h" #include "upb/table.h" #ifdef __cplusplus extern "C" { #endif -struct _upb_symtab; -typedef struct _upb_symtab upb_symtab; +/* upb_def: base class for defs **********************************************/ // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. typedef enum { - UPB_DEF_MSG = 1, + UPB_DEF_MSG, + UPB_DEF_FIELD, UPB_DEF_ENUM, UPB_DEF_SERVICE, // Not yet implemented. UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() - UPB_DEF_UNRESOLVED = 99, // Internal-only. } upb_deftype_t; - -/* upb_def: base class for defs **********************************************/ - -typedef struct { - char *fqname; // Fully qualified. - upb_symtab *symtab; // Def is mutable iff symtab == NULL. - upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0). +typedef struct _upb_def { + upb_refcount refcount; + char *fullname; upb_deftype_t type; + bool is_finalized; } upb_def; +#define UPB_UPCAST(ptr) (&(ptr)->base) + // Call to ref/unref a def. Can be used at any time, but is not thread-safe -// until the def is in a symtab. While a def is in a symtab, everything -// reachable from that def (the symtab and all defs in the symtab) are -// guaranteed to be alive. -void upb_def_ref(const upb_def *def); -void upb_def_unref(const upb_def *def); -upb_def *upb_def_dup(const upb_def *def); - -// A def is mutable until it has been added to a symtab. +// until the def is finalized. While a def is finalized, everything reachable +// from that def is guaranteed to be alive. +void upb_def_ref(const upb_def *def, void *owner); +void upb_def_unref(const upb_def *def, void *owner); +void upb_def_donateref(const upb_def *def, void *from, void *to); +upb_def *upb_def_dup(const upb_def *def, void *owner); + +// A def is mutable until it has been finalized. bool upb_def_ismutable(const upb_def *def); -INLINE const char *upb_def_fqname(const upb_def *def) { return def->fqname; } -bool upb_def_setfqname(upb_def *def, const char *fqname); // Only if mutable. +bool upb_def_isfinalized(const upb_def *def); -#define UPB_UPCAST(ptr) (&(ptr)->base) +// "fullname" is the def's fully-qualified name (eg. foo.bar.Message). +INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; } + +// The def must be mutable. Caller retains ownership of fullname. Defs are +// not required to have a name; if a def has no name when it is finalized, it +// will remain an anonymous def. +bool upb_def_setfullname(upb_def *def, const char *fullname); + +// Finalizes the given defs; this validates all constraints and marks the defs +// as finalized (read-only). This will also cause fielddefs to take refs on +// their subdefs so that any reachable def will be kept alive (but this is +// done in a way that correctly handles circular references). +// +// On success, a new list is returned containing the finalized defs and +// ownership of the "defs" list passes to the function. On failure NULL is +// returned and the caller retains ownership of "defs." +// +// Symbolic references to sub-types or enum defaults must have already been +// resolved. "defs" must contain the transitive closure of any mutable defs +// reachable from the any def in the list. In other words, there may not be a +// mutable def which is reachable from one of "defs" that does not appear +// elsewhere in "defs." "defs" may not contain fielddefs, but any fielddefs +// reachable from the given msgdefs will be finalized. +// +// n is currently limited to 64k defs, if more are required break them into +// batches of 64k (or we could raise this limit, at the cost of a bigger +// upb_def structure or complexity in upb_finalize()). +bool upb_finalize(upb_def *const*defs, int n, upb_status *status); /* upb_fielddef ***************************************************************/ -// A upb_fielddef describes a single field in a message. It isn't a full def -// in the sense that it derives from upb_def. It cannot stand on its own; it -// must be part of a upb_msgdef. It is also reference-counted. +// We choose these to match descriptor.proto. Clients may use UPB_TYPE() and +// UPB_LABEL() instead of referencing these directly. +typedef enum { + UPB_TYPE_NONE = -1, // Internal-only, may be removed. + UPB_TYPE_ENDGROUP = 0, // Internal-only, may be removed. + UPB_TYPE_DOUBLE = 1, + UPB_TYPE_FLOAT = 2, + UPB_TYPE_INT64 = 3, + UPB_TYPE_UINT64 = 4, + UPB_TYPE_INT32 = 5, + UPB_TYPE_FIXED64 = 6, + UPB_TYPE_FIXED32 = 7, + UPB_TYPE_BOOL = 8, + UPB_TYPE_STRING = 9, + UPB_TYPE_GROUP = 10, + UPB_TYPE_MESSAGE = 11, + UPB_TYPE_BYTES = 12, + UPB_TYPE_UINT32 = 13, + UPB_TYPE_ENUM = 14, + UPB_TYPE_SFIXED32 = 15, + UPB_TYPE_SFIXED64 = 16, + UPB_TYPE_SINT32 = 17, + UPB_TYPE_SINT64 = 18, +} upb_fieldtype_t; + +#define UPB_NUM_TYPES 19 + +typedef enum { + UPB_LABEL_OPTIONAL = 1, + UPB_LABEL_REQUIRED = 2, + UPB_LABEL_REPEATED = 3, +} upb_label_t; + +// These macros are provided for legacy reasons. +#define UPB_TYPE(type) UPB_TYPE_ ## type +#define UPB_LABEL(type) UPB_LABEL_ ## type + +// Info for a given field type. +typedef struct { + uint8_t align; + uint8_t size; + uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 +} upb_typeinfo; + +extern const upb_typeinfo upb_types[UPB_NUM_TYPES]; + +// A upb_fielddef describes a single field in a message. It is most often +// found as a part of a upb_msgdef, but can also stand alone to represent +// an extension. typedef struct _upb_fielddef { + upb_def base; struct _upb_msgdef *msgdef; - upb_def *def; // if upb_hasdef(f) - upb_atomic_t refcount; - bool finalized; - - // The following fields may be modified until the def is finalized. - uint8_t type; // Use UPB_TYPE() constants. - uint8_t label; // Use UPB_LABEL() constants. + union { + char *name; // If subdef_is_symbolic. + upb_def *def; // If !subdef_is_symbolic. + } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). + bool subdef_is_symbolic; + bool default_is_string; + bool subdef_is_owned; + upb_fieldtype_t type; + upb_label_t label; int16_t hasbit; uint16_t offset; - bool default_is_string; - bool active; int32_t number; - char *name; - upb_value defaultval; // Only meaningful for non-repeated scalars and strings. + upb_value defaultval; // Only for non-repeated scalars and strings. upb_value fval; struct _upb_accessor_vtbl *accessor; - const void *default_ptr; const void *prototype; } upb_fielddef; -upb_fielddef *upb_fielddef_new(void); -void upb_fielddef_ref(upb_fielddef *f); -void upb_fielddef_unref(upb_fielddef *f); -upb_fielddef *upb_fielddef_dup(upb_fielddef *f); +// Returns NULL if memory allocation failed. +upb_fielddef *upb_fielddef_new(void *owner); + +INLINE void upb_fielddef_ref(upb_fielddef *f, void *owner) { + upb_def_ref(UPB_UPCAST(f), owner); +} +INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) { + upb_def_unref(UPB_UPCAST(f), owner); +} + +// Duplicates the given field, returning NULL if memory allocation failed. +// When a fielddef is duplicated, the subdef (if any) is made symbolic if it +// wasn't already. If the subdef is set but has no name (which is possible +// since msgdefs are not required to have a name) the new fielddef's subdef +// will be unset. +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner); + +INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) { + return upb_def_ismutable(UPB_UPCAST(f)); +} +INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) { + return !upb_fielddef_ismutable(f); +} -// A fielddef is mutable until its msgdef has been added to a symtab. -bool upb_fielddef_ismutable(const upb_fielddef *f); +// Simple accessors. /////////////////////////////////////////////////////////// -// Read accessors. May be called any time. -INLINE uint8_t upb_fielddef_type(const upb_fielddef *f) { return f->type; } -INLINE uint8_t upb_fielddef_label(const upb_fielddef *f) { return f->label; } +INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { + return f->type; +} +INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) { + return f->label; +} INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; } -INLINE char *upb_fielddef_name(const upb_fielddef *f) { return f->name; } +INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; } +INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; } +INLINE const char *upb_fielddef_name(const upb_fielddef *f) { + return upb_def_fullname(UPB_UPCAST(f)); +} INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; } -INLINE bool upb_fielddef_finalized(const upb_fielddef *f) { return f->finalized; } INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { return f->msgdef; } INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) { return f->accessor; } -INLINE const char *upb_fielddef_typename(const upb_fielddef *f) { - return f->def ? f->def->fqname : NULL; -} -// Returns the default value for this fielddef, which may either be something -// the client set explicitly or the "default default" (0 for numbers, empty for -// strings). The field's type indicates the type of the returned value, except -// for enums. For enums the default can be set either numerically or -// symbolically -- the upb_fielddef_default_is_symbolic() function below will -// indicate which it is. For string defaults, the value will be a upb_strref -// which is invalidated by any other call on this object. -INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { - return f->defaultval; -} +bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type); +bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); +void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit); +void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset); +// TODO(haberman): need a way of keeping the fval alive even if some handlers +// outlast the fielddef. +void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); -// The results of this function are only meaningful for enum fields, which can -// have a default specified either as an integer or as a string. If this -// returns true, the default returned from upb_fielddef_default() is a string, -// otherwise it is an integer. -INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { - return f->default_is_string; +// "Number" and "fullname" must be set before the fielddef is added to a msgdef. +// For the moment we do not allow these to be set once the fielddef is added to +// a msgdef -- this could be relaxed in the future. +bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); +INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) { + return upb_def_setfullname(UPB_UPCAST(f), name); } -// The enum or submessage def for this field, if any. Only meaningful for -// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true). -// Since defs are not linked together until they are in a symtab, this -// will return NULL until the msgdef is in a symtab. -upb_def *upb_fielddef_subdef(const upb_fielddef *f); +// Field type tests. /////////////////////////////////////////////////////////// -// Write accessors. "Number" and "name" must be set before the fielddef is -// added to a msgdef. For the moment we do not allow these to be set once -// the fielddef is added to a msgdef -- this could be relaxed in the future. -bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); -bool upb_fielddef_setname(upb_fielddef *f, const char *name); +INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { + return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); +} +INLINE bool upb_isstringtype(upb_fieldtype_t type) { + return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); +} +INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { + return !upb_issubmsgtype(type) && !upb_isstringtype(type); +} +INLINE bool upb_issubmsg(const upb_fielddef *f) { + return upb_issubmsgtype(f->type); +} +INLINE bool upb_isstring(const upb_fielddef *f) { + return upb_isstringtype(f->type); +} +INLINE bool upb_isseq(const upb_fielddef *f) { + return f->label == UPB_LABEL(REPEATED); +} -// These writers may be called at any time prior to being put in a symtab. -bool upb_fielddef_settype(upb_fielddef *f, uint8_t type); -bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label); -void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); +// Default value. ////////////////////////////////////////////////////////////// -// The name of the message or enum this field is referring to. Must be found -// at name resolution time (when upb_symtab_add() is called). +// Returns the default value for this fielddef, which may either be something +// the client set explicitly or the "default default" (0 for numbers, empty for +// strings). The field's type indicates the type of the returned value, except +// for enum fields that are still mutable. // -// NOTE: May only be called for fields whose type has already been set to -// be a submessage, group, or enum! Also, will be reset to empty if the -// field's type is set again. -bool upb_fielddef_settypename(upb_fielddef *f, const char *name); - -// The default value for the field. For numeric types, use +// For enums the default can be set either numerically or symbolically -- the +// upb_fielddef_default_is_symbolic() function below will indicate which it is. +// For string defaults, the value will be a upb_byteregion which is invalidated +// by any other non-const call on this object. Once the fielddef is finalized, +// symbolic enum defaults are resolved, so finalized enum fielddefs always have +// a default of type int32. +INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { + return f->defaultval; +} +// Sets default value for the field. For numeric types, use // upb_fielddef_setdefault(), and "value" must match the type of the field. -// For string/bytes types, use upb_fielddef_setdefaultstr(). -// Enum types may use either, since the default may be set either numerically -// or symbolically. +// For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may +// use either, since the default may be set either numerically or symbolically. // // NOTE: May only be called for fields whose type has already been set. // Also, will be reset to default if the field's type is set again. void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); -void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); +bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str); -// A variety of tests about the type of a field. -INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} -INLINE bool upb_isstringtype(upb_fieldtype_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} -INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { - return !upb_issubmsgtype(type) && !upb_isstringtype(type); +// The results of this function are only meaningful for mutable enum fields, +// which can have a default specified either as an integer or as a string. If +// this returns true, the default returned from upb_fielddef_default() is a +// string, otherwise it is an integer. +INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { + assert(f->type == UPB_TYPE(ENUM)); + return f->default_is_string; } -INLINE bool upb_issubmsg(const upb_fielddef *f) { return upb_issubmsgtype(f->type); } -INLINE bool upb_isstring(const upb_fielddef *f) { return upb_isstringtype(f->type); } -INLINE bool upb_isseq(const upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); } -// Does the type of this field imply that it should contain an associated def? +// Subdef. ///////////////////////////////////////////////////////////////////// + +// Submessage and enum fields must reference a "subdef", which is the +// upb_msgdef or upb_enumdef that defines their type. Note that when the +// fielddef is mutable it may not have a subdef *yet*, but this function still +// returns true to indicate that the field's type requires a subdef. INLINE bool upb_hassubdef(const upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +// Before a fielddef is finalized, its subdef may be set either directly (with +// a upb_def*) or symbolically. Symbolic refs must be resolved before the +// containing msgdef can be finalized (see upb_resolve() above). The client is +// responsible for making sure that "subdef" lives until this fielddef is +// finalized or deleted. +// +// Both methods require that upb_hassubdef(f) (so the type must be set prior +// to calling these methods). Returns false if this is not the case, or if +// the given subdef is not of the correct type. The subtype is reset if the +// field's type is changed. +bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef); +bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name); + +// Returns the enum or submessage def or symbolic name for this field, if any. +// Requires that upb_hassubdef(f). Returns NULL if the subdef has not been set +// or if you ask for a subtype name when the subtype is currently set +// symbolically (or vice-versa). To access the subtype's name for a linked +// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)). +// +// Caller does *not* own a ref on the returned def or string. +// upb_fielddef_subtypename() is non-const because finalized defs will never +// have a symbolic reference (they must be resolved before the msgdef can be +// finalized). +upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f); +const upb_def *upb_fielddef_subdef(const upb_fielddef *f); +const char *upb_fielddef_subtypename(upb_fielddef *f); + /* upb_msgdef *****************************************************************/ @@ -232,31 +356,31 @@ typedef struct _upb_msgdef { upb_inttable itof; // int to field upb_strtable ntof; // name to field - // The following fields may be modified until finalized. + // The following fields may be modified while mutable. uint16_t size; uint8_t hasbit_bytes; // The range of tag numbers used to store extensions. uint32_t extstart, extend; + // Used for proto2 integration. + const void *prototype; } upb_msgdef; -// Hash table entries for looking up fields by name or number. -typedef struct { - bool junk; - upb_fielddef *f; -} upb_itof_ent; -typedef struct { - upb_fielddef *f; -} upb_ntof_ent; +// Returns NULL if memory allocation failed. +upb_msgdef *upb_msgdef_new(void *owner); -upb_msgdef *upb_msgdef_new(void); -INLINE void upb_msgdef_unref(const upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); } -INLINE void upb_msgdef_ref(const upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); } +INLINE void upb_msgdef_unref(const upb_msgdef *md, void *owner) { + upb_def_unref(UPB_UPCAST(md), owner); +} +INLINE void upb_msgdef_ref(const upb_msgdef *md, void *owner) { + upb_def_ref(UPB_UPCAST(md), owner); +} // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced with -// just the name of the submessage. This can be put back into another symtab -// and the names will be re-resolved in the new context. -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m); +// just the name of the submessage. Returns NULL if memory allocation failed. +// This can be put back into another symtab and the names will be re-resolved +// in the new context. +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner); // Read accessors. May be called at any time. INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; } @@ -271,38 +395,35 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size); void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes); bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end); -// Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains its -// ref on the fielddef. May only be done before the msgdef is in a symtab -// (requires upb_def_ismutable(m) for the msgdef). The fielddef's name and -// number must be set, and the message may not already contain any field with -// this name or number, and this fielddef may not be part of another message, -// otherwise false is returned and no action is performed. -bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *f, int n); -INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) { - return upb_msgdef_addfields(m, &f, 1); -} - -// Sets the layout of all fields according to default rules: -// 1. Hasbits for required fields come first, then optional fields. -// 2. Values are laid out in a way that respects alignment rules. -// 3. The order is chosen to minimize memory usage. -// This should only be called once all fielddefs have been added. -// TODO: will likely want the ability to exclude strings/submessages/arrays. -// TODO: will likely want the ability to define a header size. -void upb_msgdef_layout(upb_msgdef *m); +// Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that the +// msgdef and all the fielddefs are mutable. The fielddef's name and number +// must be set, and the message may not already contain any field with this +// name or number, and this fielddef may not be part of another message. In +// error cases false is returned and the msgdef is unchanged. +// +// On success, the msgdef takes a ref on the fielddef so the caller needn't +// worry about continuing to keep it alive (however the reverse is not true; +// refs on the fielddef will *not* keep the msgdef alive). If ref_donor is +// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef, +// otherwise caller retains its reference(s) on the defs in f. +bool upb_msgdef_addfields( + upb_msgdef *m, upb_fielddef *const *f, int n, void *ref_donor); +INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, + void *ref_donor) { + return upb_msgdef_addfields(m, &f, 1, ref_donor); +} // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { - upb_itof_ent *e = (upb_itof_ent*) - upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent)); - return e ? e->f : NULL; + const upb_value *val = upb_inttable_lookup32(&m->itof, i); + return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; } INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { - upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); - return e ? e->f : NULL; + const upb_value *val = upb_strtable_lookup(&m->ntof, name); + return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; } INLINE int upb_msgdef_numfields(const upb_msgdef *m) { @@ -313,20 +434,19 @@ INLINE int upb_msgdef_numfields(const upb_msgdef *m) { // TODO: the iteration should be in field order. // Iterators are invalidated when a field is added or removed. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { -// upb_fielddef *f = upb_msg_iter_field(i); +// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { +// upb_fielddef *f = upb_msg_iter_field(&i); // // ... // } typedef upb_inttable_iter upb_msg_iter; -upb_msg_iter upb_msg_begin(const upb_msgdef *m); -upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter); -INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); } +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m); +void upb_msg_next(upb_msg_iter *iter); +INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); } // Iterator accessor. -INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { - upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter); - return ent->f; +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) { + return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); } @@ -339,84 +459,75 @@ typedef struct _upb_enumdef { int32_t defaultval; } upb_enumdef; -typedef struct { - uint32_t value; -} upb_ntoi_ent; - -typedef struct { - bool junk; - char *str; -} upb_iton_ent; - -upb_enumdef *upb_enumdef_new(void); -INLINE void upb_enumdef_ref(const upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); } -INLINE void upb_enumdef_unref(const upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e); +// Returns NULL if memory allocation failed. +upb_enumdef *upb_enumdef_new(void *owner); +INLINE void upb_enumdef_ref(const upb_enumdef *e, void *owner) { + upb_def_ref(&e->base, owner); +} +INLINE void upb_enumdef_unref(const upb_enumdef *e, void *owner) { + upb_def_unref(&e->base, owner); +} +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner); -INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; } +INLINE int32_t upb_enumdef_default(const upb_enumdef *e) { + return e->defaultval; +} // May only be set if upb_def_ismutable(e). void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); -// Adds a value to the enumdef. Requires that no existing val has this -// name or number (returns false and does not add if there is). May only -// be called before the enumdef is in a symtab. -bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num); +// Returns the number of values currently defined in the enum. Note that +// multiple names can refer to the same number, so this may be greater than the +// total number of unique numbers. +INLINE int upb_enumdef_numvals(const upb_enumdef *e) { + return upb_strtable_count(&e->ntoi); +} + +// Adds a value to the enumdef. Requires that no existing val has this name, +// but duplicate numbers are allowed. May only be called if the enumdef is +// mutable. Returns false if the existing name is used, or if "name" is not a +// valid label, or on memory allocation failure (we may want to distinguish +// these failure cases in the future). +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num); -// Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoil(upb_enumdef *e, const char *name, size_t len, int32_t *num); -bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num); -// Caller does not own the returned string. -const char *upb_enumdef_iton(upb_enumdef *e, int32_t num); +// Lookups from name to integer, returning true if found. +bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num); + +// Finds the name corresponding to the given number, or NULL if none was found. +// If more than one name corresponds to this number, returns the first one that +// was added. +const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); // Iteration over name/value pairs. The order is undefined. // Adding an enum val invalidates any iterators. // upb_enum_iter i; -// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { // // ... // } -typedef upb_inttable_iter upb_enum_iter; +typedef upb_strtable_iter upb_enum_iter; -upb_enum_iter upb_enum_begin(const upb_enumdef *e); -upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter); -INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); } +void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e); +void upb_enum_next(upb_enum_iter *iter); +bool upb_enum_done(upb_enum_iter *iter); // Iterator accessors. -INLINE char *upb_enum_iter_name(upb_enum_iter iter) { - upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter); - return e->str; +INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) { + return upb_strtable_iter_key(iter); } -INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { - return upb_inttable_iter_key(iter); +INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) { + return upb_value_getint32(upb_strtable_iter_value(iter)); } -/* upb_deflist ****************************************************************/ - -// upb_deflist is an internal-only dynamic array for storing a growing list of -// upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -void upb_deflist_init(upb_deflist *l); -void upb_deflist_uninit(upb_deflist *l); -void upb_deflist_push(upb_deflist *l, upb_def *d); - - /* upb_symtab *****************************************************************/ -// A symtab (symbol table) is where upb_defs live. It is empty when first -// constructed. Clients add definitions to the symtab (or replace existing -// definitions) by calling upb_symtab_add(). -struct _upb_symtab { - upb_atomic_t refcount; - upb_rwlock_t lock; // Protects all members except the refcount. - upb_strtable symtab; // The symbol table. - upb_deflist olddefs; -}; +// A symtab (symbol table) stores a name->def map of upb_defs. Clients could +// always create such tables themselves, but upb_symtab has logic for resolving +// symbolic references, which is nontrivial. +typedef struct { + uint32_t refcount; + upb_strtable symtab; +} upb_symtab; upb_symtab *upb_symtab_new(void); void upb_symtab_ref(const upb_symtab *s); @@ -430,33 +541,47 @@ void upb_symtab_unref(const upb_symtab *s); // within this message are searched, then within the parent, on up to the // root namespace). // -// If a def is found, the caller owns one ref on the returned def. Otherwise -// returns NULL. +// If a def is found, the caller owns one ref on the returned def, owned by +// owner. Otherwise returns NULL. const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym); + const char *sym, void *owner); -// Find an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def. Otherwise returns NULL. -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym); -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym); +// Finds an entry in the symbol table with this exact name. If a def is found, +// the caller owns one ref on the returned def, owned by owner. Otherwise +// returns NULL. +const upb_def *upb_symtab_lookup( + const upb_symtab *s, const char *sym, void *owner); +const upb_msgdef *upb_symtab_lookupmsg( + const upb_symtab *s, const char *sym, void *owner); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are -// returned, otherwise only defs of the required type are returned. -const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *n, upb_deftype_t type); - -// Adds the given defs to the symtab, resolving all symbols. Only one def per -// name may be in the list, but defs can replace existing defs in the symtab. +// to each symbol inside (owned by owner). If type is UPB_DEF_ANY then defs of +// all types are returned, otherwise only defs of the required type are +// returned. +const upb_def **upb_symtab_getdefs( + const upb_symtab *s, int *n, upb_deftype_t type, void *owner); + +// Adds the given defs to the symtab, resolving all symbols (including enum +// default values) and finalizing the defs. Only one def per name may be in +// the list, but defs can replace existing defs in the symtab. All defs must +// have a name -- anonymous defs are not allowed. Anonymous defs can still be +// finalized by calling upb_def_finalize() directly. +// +// Any existing defs that can reach defs that are being replaced will +// themselves be replaced also, so that the resulting set of defs is fully +// consistent. +// +// This logic implemented in this method is a convenience; ultimately it calls +// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and +// upb_finalize(), any of which the client could call themself. However, since +// the logic for doing so is nontrivial, we provide it here. +// // The entire operation either succeeds or fails. If the operation fails, the // symtab is unchanged, false is returned, and status indicates the error. The -// caller retains its ref on all defs in all cases. -bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status); - -// Frees defs that are no longer active in the symtab and are no longer -// reachable. Such defs are not freed when they are replaced in the symtab -// if they are still reachable from defs that are still referenced. -void upb_symtab_gc(upb_symtab *s); +// caller passes a ref on all defs to the symtab (even if the operation fails). +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status); /* upb_def casts **************************************************************/ @@ -483,9 +608,9 @@ void upb_symtab_gc(upb_symtab *s); return (const struct _upb_ ## lower*)def; \ } UPB_DEF_CASTS(msgdef, MSG); +UPB_DEF_CASTS(fielddef, FIELD); UPB_DEF_CASTS(enumdef, ENUM); UPB_DEF_CASTS(svcdef, SERVICE); -UPB_DEF_CASTS(unresolveddef, UNRESOLVED); #undef UPB_DEF_CASTS #ifdef __cplusplus diff --git a/upb/descriptor_const.h b/upb/descriptor/descriptor_const.h index 20058e4..52ca803 100644 --- a/upb/descriptor_const.h +++ b/upb/descriptor/descriptor_const.h @@ -9,79 +9,47 @@ extern "C" { /* Enums. */ -typedef enum google_protobuf_FieldOptions_CType { - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2 -} google_protobuf_FieldOptions_CType; - typedef enum google_protobuf_FieldDescriptorProto_Type { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18 + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17 } google_protobuf_FieldDescriptorProto_Type; typedef enum google_protobuf_FieldDescriptorProto_Label { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3 + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1 } google_protobuf_FieldDescriptorProto_Label; +typedef enum google_protobuf_FieldOptions_CType { + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2 +} google_protobuf_FieldOptions_CType; + typedef enum google_protobuf_FileOptions_OptimizeMode { - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2, + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3 } google_protobuf_FileOptions_OptimizeMode; /* Constants for field names and numbers. */ -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path" #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5 @@ -106,6 +74,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value" #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9 + #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value" #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1 @@ -114,10 +86,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value" #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9 - #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9 @@ -138,14 +106,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 @@ -154,6 +114,14 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name" #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9 @@ -170,53 +138,13 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location" @@ -230,6 +158,18 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end" #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 + +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype" #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14 @@ -254,18 +194,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package" #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for" -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8 - #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services" #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8 @@ -286,17 +214,69 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9 +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for" +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8 + +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" @@ -322,6 +302,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type" #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name" #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9 @@ -330,18 +314,34 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value" #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8 + +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8 + +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8 + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/descriptor.c b/upb/descriptor/reader.c index 0c589f2..8177560 100644 --- a/upb/descriptor.c +++ b/upb/descriptor/reader.c @@ -8,13 +8,14 @@ #include <stdlib.h> #include <errno.h> #include "upb/def.h" -#include "upb/descriptor.h" +#include "upb/descriptor/descriptor_const.h" +#include "upb/descriptor/reader.h" // Returns a newly allocated string that joins input strings together, for example: // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" // join("", "Baz") -> "Baz" // Caller owns a ref on the returned string. */ -static char *upb_join(char *base, char *name) { +static char *upb_join(const char *base, const char *name) { if (!base || strlen(base) == 0) { return strdup(name); } else { @@ -27,6 +28,36 @@ static char *upb_join(char *base, char *name) { } } +void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; + l->owned = true; +} + +void upb_deflist_uninit(upb_deflist *l) { + if (l->owned) + for(size_t i = 0; i < l->len; i++) + upb_def_unref(l->defs[i], &l->defs); + free(l->defs); +} + +void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); + } + l->defs[l->len++] = d; +} + +void upb_deflist_donaterefs(upb_deflist *l, void *owner) { + assert(l->owned); + for (size_t i = 0; i < l->len; i++) + upb_def_donateref(l->defs[i], &l->defs, owner); + l->owned = false; +} + + /* upb_descreader ************************************************************/ static upb_def *upb_deflist_last(upb_deflist *l) { @@ -37,8 +68,8 @@ static upb_def *upb_deflist_last(upb_deflist *l) { static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { upb_def *def = l->defs[i]; - char *name = def->fqname; - def->fqname = upb_join(str, name); + char *name = upb_join(str, upb_def_fullname(def)); + upb_def_setfullname(def, name); free(name); } } @@ -66,9 +97,9 @@ void upb_descreader_uninit(upb_descreader *r) { } } -upb_def **upb_descreader_getdefs(upb_descreader *r, int *n) { +upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { *n = r->defs.len; - r->defs.len = 0; + upb_deflist_donaterefs(&r->defs, owner); return r->defs.defs; } @@ -204,7 +235,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, return; } upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - if (upb_inttable_count(&e->iton) == 0) { + if (upb_enumdef_numvals(e) == 0) { // The default value of an enum (in the absence of an explicit default) is // its first listed value. upb_enumdef_setdefault(e, r->number); @@ -236,18 +267,18 @@ static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( // google.protobuf.EnumDescriptorProto. static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new())); + upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs))); return UPB_CONTINUE; } static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) { + if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) { upb_status_seterrliteral(status, "Enum had no name."); return; } - if (upb_inttable_count(&e->iton) == 0) { + if (upb_enumdef_numvals(e) == 0) { upb_status_seterrliteral(status, "Enum had no values."); return; } @@ -258,9 +289,9 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - free(e->base.fqname); - e->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val)); + char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val)); + upb_def_setfullname(upb_descreader_last(r), fullname); + free(fullname); return UPB_CONTINUE; } @@ -284,7 +315,7 @@ static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) static upb_flow_t upb_fielddef_startmsg(void *_r) { upb_descreader *r = _r; - r->f = upb_fielddef_new(); + r->f = upb_fielddef_new(&r->defs); free(r->default_string); r->default_string = NULL; return UPB_CONTINUE; @@ -370,13 +401,12 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_fielddef *f = r->f; // TODO: verify that all required fields were present. - assert(f->number != -1 && f->name != NULL); - assert((f->def != NULL) == upb_hassubdef(f)); + assert(f->number != -1 && upb_fielddef_name(f) != NULL); + assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f)); // Field was successfully read, add it as a field of the msgdef. upb_msgdef *m = upb_descreader_top(r); - upb_msgdef_addfield(m, f); - upb_fielddef_unref(f); + upb_msgdef_addfield(m, f, &r->defs); r->f = NULL; if (r->default_string) { @@ -435,7 +465,7 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, (void)fval; upb_descreader *r = _r; char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_fielddef_settypename(r->f, name); + upb_fielddef_setsubtypename(r->f, name); free(name); return UPB_CONTINUE; } @@ -479,7 +509,7 @@ static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( // google.protobuf.DescriptorProto. static upb_flow_t upb_msgdef_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new())); + upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs))); upb_descreader_startcontainer(r); return UPB_CONTINUE; } @@ -487,7 +517,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_r) { static void upb_msgdef_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); - if(!m->base.fqname) { + if(!upb_def_fullname(UPB_UPCAST(m))) { upb_status_seterrliteral(status, "Encountered message with no name."); return; } @@ -497,11 +527,10 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) { static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { (void)fval; upb_descreader *r = _r; - assert(val.type == UPB_TYPE(STRING)); upb_msgdef *m = upb_descreader_top(r); - free(m->base.fqname); - m->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_descreader_setscopename(r, strdup(m->base.fqname)); + char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); + upb_def_setfullname(UPB_UPCAST(m), name); + upb_descreader_setscopename(r, name); // Passes ownership of name. return UPB_CONTINUE; } @@ -530,4 +559,3 @@ static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { } #undef FNUM #undef FTYPE - diff --git a/upb/descriptor.h b/upb/descriptor/reader.h index 21099b3..0e1bfa0 100644 --- a/upb/descriptor.h +++ b/upb/descriptor/reader.h @@ -4,9 +4,9 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * Routines for building defs by parsing descriptors in descriptor.proto format. - * This only needs to use the public API of upb_symtab. Later we may also - * add routines for dumping a symtab to a descriptor. + * upb_descreader provides a set of sink handlers that will build defs from a + * data source that uses the descriptor.proto schema (like a protobuf binary + * descriptor). */ #ifndef UPB_DESCRIPTOR_H @@ -18,6 +18,20 @@ extern "C" { #endif +/* upb_deflist ****************************************************************/ + +// upb_deflist is an internal-only dynamic array for storing a growing list of +// upb_defs. +typedef struct { + upb_def **defs; + size_t len; + size_t size; + bool owned; +} upb_deflist; + +void upb_deflist_init(upb_deflist *l); +void upb_deflist_uninit(upb_deflist *l); +void upb_deflist_push(upb_deflist *l, upb_def *d); /* upb_descreader ************************************************************/ @@ -56,11 +70,11 @@ void upb_descreader_uninit(upb_descreader *r); upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h); // Gets the array of defs that have been parsed and removes them from the -// descreader. Ownership of the defs is passed to the caller, but the -// ownership of the returned array is retained and is invalidated by any other -// call into the descreader. The defs will not have been resolved, and are -// ready to be added to a symtab. -upb_def **upb_descreader_getdefs(upb_descreader *r, int *n); +// descreader. Ownership of the defs is passed to the caller using the given +// owner), but the ownership of the returned array is retained and is +// invalidated by any other call into the descreader. The defs will not have +// been resolved, and are ready to be added to a symtab. +upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n); #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/handlers.c b/upb/handlers.c index 1ccaf8d..ea5a054 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -13,7 +13,7 @@ static upb_mhandlers *upb_mhandlers_new() { upb_mhandlers *m = malloc(sizeof(*m)); - upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent)); + upb_inttable_init(&m->fieldtab); m->startmsg = NULL; m->endmsg = NULL; m->is_group = false; @@ -26,20 +26,19 @@ static upb_mhandlers *upb_mhandlers_new() { static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, upb_fieldtype_t type, bool repeated) { - upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n); + const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); // TODO: design/refine the API for changing the set of fields or modifying // existing handlers. - if (e) return NULL; - upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0), + if (v) return NULL; + upb_fhandlers new_f = {type, repeated, 0, n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, #ifdef UPB_USE_JIT_X64 0, 0, 0, #endif - NULL}; + }; upb_fhandlers *ptr = malloc(sizeof(*ptr)); memcpy(ptr, &new_f, sizeof(upb_fhandlers)); - upb_itofhandlers_ent ent = {false, ptr}; - upb_inttable_insert(&m->fieldtab, n, &ent); + upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr)); return ptr; } @@ -64,12 +63,17 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, return f; } +upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) { + const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); + return v ? upb_value_getptr(*v) : NULL; +} + /* upb_handlers ***************************************************************/ upb_handlers *upb_handlers_new() { upb_handlers *h = malloc(sizeof(*h)); - upb_atomic_init(&h->refcount, 1); + h->refcount = 1; h->msgs_len = 0; h->msgs_size = 4; h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); @@ -77,19 +81,18 @@ upb_handlers *upb_handlers_new() { return h; } -void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); } +void upb_handlers_ref(upb_handlers *h) { h->refcount++; } void upb_handlers_unref(upb_handlers *h) { - if (upb_atomic_unref(&h->refcount)) { + if (--h->refcount == 0) { for (int i = 0; i < h->msgs_len; i++) { upb_mhandlers *mh = h->msgs[i]; - for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab); - !upb_inttable_done(j); - j = upb_inttable_next(&mh->fieldtab, j)) { - upb_itofhandlers_ent *e = upb_inttable_iter_value(j); - free(e->f); + upb_inttable_iter j; + upb_inttable_begin(&j, &mh->fieldtab); + for(; !upb_inttable_done(&j); upb_inttable_next(&j)) { + free(upb_value_getptr(upb_inttable_iter_value(&j))); } - upb_inttable_free(&mh->fieldtab); + upb_inttable_uninit(&mh->fieldtab); #ifdef UPB_USE_JIT_X64 free(mh->tablearray); #endif @@ -110,31 +113,28 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) { return mh; } -typedef struct { - upb_mhandlers *mh; -} upb_mtab_ent; - static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m, upb_onmsgreg *msgreg_cb, upb_onfieldreg *fieldreg_cb, void *closure, upb_strtable *mtab) { upb_mhandlers *mh = upb_handlers_newmhandlers(h); - upb_mtab_ent e = {mh}; - upb_strtable_insert(mtab, m->base.fqname, &e); + upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh)); if (msgreg_cb) msgreg_cb(closure, mh, m); upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); upb_fhandlers *fh; if (upb_issubmsg(f)) { upb_mhandlers *sub_mh; - upb_mtab_ent *subm_ent; + const upb_value *subm_ent; // The table lookup is necessary to break the DFS for type cycles. - if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) { - sub_mh = subm_ent->mh; + const char *subname = upb_def_fullname(upb_fielddef_subdef(f)); + if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) { + sub_mh = upb_value_getptr(*subm_ent); } else { - sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb, - fieldreg_cb, closure, mtab); + sub_mh = upb_regmsg_dfs( + h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)), + msgreg_cb, fieldreg_cb, closure, mtab); } fh = upb_mhandlers_newfhandlers_subm( mh, f->number, f->type, upb_isseq(f), sub_mh); @@ -151,10 +151,10 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, upb_onfieldreg *fieldreg_cb, void *closure) { upb_strtable mtab; - upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent)); + upb_strtable_init(&mtab); upb_mhandlers *ret = upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab); - upb_strtable_free(&mtab); + upb_strtable_uninit(&mtab); return ret; } @@ -212,6 +212,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); if (f->startseq) sflow = f->startseq(d->top->closure, f->fval); + _upb_dispatcher_sethas(d->top->closure, f->hasbit); if (sflow.flow != UPB_CONTINUE) { _upb_dispatcher_abortjmp(d); } @@ -247,6 +248,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval); + _upb_dispatcher_sethas(d->top->closure, f->hasbit); if (sflow.flow != UPB_CONTINUE) { _upb_dispatcher_abortjmp(d); } diff --git a/upb/handlers.h b/upb/handlers.h index 9ed02c1..9083a2e 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -9,6 +9,10 @@ * for each message and/or field as the data is being parsed or iterated over, * without having to know the source format that we are parsing from. This * decouples the parsing logic from the processing logic. + * + * TODO: should we allow handlers to longjmp()? Would be necessary to eg. let + * a Lua handler "yield" from the current coroutine. I *think* everything + * would "just work" with our current decoder. */ #ifndef UPB_HANDLERS_H @@ -141,9 +145,9 @@ struct _upb_mhandlers; typedef struct _upb_fieldent { upb_fieldtype_t type; bool repeated; - upb_atomic_t refcount; + uint32_t refcount; uint32_t number; - int32_t valuehasbit; + int32_t hasbit; struct _upb_mhandlers *msg; struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true. upb_value fval; @@ -157,14 +161,8 @@ typedef struct _upb_fieldent { uint32_t jit_pclabel_notypecheck; uint32_t jit_submsg_done_pclabel; #endif - void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); } upb_fhandlers; -typedef struct { - bool junk; // Stolen by table impl; see table.h for details. - upb_fhandlers *f; -} upb_itofhandlers_ent; - // fhandlers are created as part of a upb_handlers instance, but can be ref'd // and unref'd to prolong the life of the handlers. void upb_fhandlers_ref(upb_fhandlers *m); @@ -174,6 +172,8 @@ void upb_fhandlers_unref(upb_fhandlers *m); #define UPB_FHANDLERS_ACCESSORS(name, type) \ INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \ INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; } +// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts +// the handlers. UPB_FHANDLERS_ACCESSORS(fval, upb_value) UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*) UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*) @@ -182,11 +182,13 @@ UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*) UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*) UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*) UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*) -// If set to >= 0, the hasbit will automatically be set after the corresponding -// callback is called (when a JIT is enabled, this can be significantly more -// efficient than setting the hasbit yourself inside the callback). Could add -// this for seq and submsg also, but doesn't look like a win at the moment. -UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t) +// If set to >= 0, the hasbit will automatically be set when the corresponding +// field is parsed (when a JIT is enabled, this can be significantly more +// efficient than setting the hasbit yourself inside the callback). For values +// it is undefined whether the hasbit is set before or after the callback is +// called. For seq and submsg, the hasbit is set *after* the start handler is +// called, but before any of the handlers for the submsg or sequence. +UPB_FHANDLERS_ACCESSORS(hasbit, int32_t) /* upb_mhandlers **************************************************************/ @@ -195,7 +197,7 @@ UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t) // message in the graph of messages. typedef struct _upb_mhandlers { - upb_atomic_t refcount; + uint32_t refcount; upb_startmsg_handler *startmsg; upb_endmsg_handler *endmsg; upb_inttable fieldtab; // Maps field number -> upb_fhandlers. @@ -203,6 +205,7 @@ typedef struct _upb_mhandlers { #ifdef UPB_USE_JIT_X64 // Used inside the JIT to track labels (jmp targets) in the generated code. uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message. + uint32_t jit_afterstartmsg_pclabel; // After calling the startmsg handler. uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end? uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message. uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup. @@ -240,11 +243,14 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*); UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*); +// Returns fhandlers for the given field, or NULL if none. +upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n); + /* upb_handlers ***************************************************************/ struct _upb_handlers { - upb_atomic_t refcount; + uint32_t refcount; upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel. int msgs_len, msgs_size; bool should_jit; @@ -267,8 +273,10 @@ upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index); // with "fieldreg_cb" // // See upb_handlers_reghandlerset() below for an example. -typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, const upb_msgdef *m); -typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, const upb_fielddef *m); +typedef void upb_onmsgreg( + void *closure, upb_mhandlers *mh, const upb_msgdef *m); +typedef void upb_onfieldreg( + void *closure, upb_fhandlers *fh, const upb_fielddef *f); upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, upb_onmsgreg *msgreg_cb, upb_onfieldreg *fieldreg_cb, @@ -305,8 +313,8 @@ INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) { upb_value_setfielddef(&val, f); upb_fhandlers_setfval(fh, val); } -INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgdef *m, - upb_handlerset *hs) { +INLINE upb_mhandlers *upb_handlers_reghandlerset( + upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) { return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs); } @@ -373,7 +381,7 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, upb_value val) { upb_flow_t flow = UPB_CONTINUE; if (f->value) flow = f->value(d->top->closure, f->fval, val); - _upb_dispatcher_sethas(d->top->closure, f->valuehasbit); + _upb_dispatcher_sethas(d->top->closure, f->hasbit); if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); } void upb_dispatch_startmsg(upb_dispatcher *d); @@ -381,7 +389,8 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f); upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d); -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f); +upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, + upb_fhandlers *f); upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d); #ifdef __cplusplus @@ -4,101 +4,12 @@ * Copyright (c) 2010 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * Data structure for storing a message of protobuf data. */ #include "upb/upb.h" #include "upb/msg.h" -void upb_msg_clear(void *msg, const upb_msgdef *md) { - assert(msg != NULL); - memset(msg, 0, md->hasbit_bytes); - // TODO: set primitive fields to defaults? -} - -void *upb_stdarray_append(upb_stdarray *a, size_t type_size) { - assert(a != NULL); - assert(a->len <= a->size); - if (a->len == a->size) { - size_t old_size = a->size; - a->size = old_size == 0 ? 8 : (old_size * 2); - a->ptr = realloc(a->ptr, a->size * type_size); - memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size); - } - return &a->ptr[a->len++ * type_size]; -} - -#if 0 -static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, - upb_dispatcher *d); - -static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f, - upb_dispatcher *d, upb_fhandlers *hf) { - if (upb_issubmsg(f)) { - upb_msg *msg = upb_value_getmsg(val); - upb_dispatch_startsubmsg(d, hf); - upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d); - upb_dispatch_endsubmsg(d); - } else { - upb_dispatch_value(d, hf, val); - } - return UPB_CONTINUE; -} - -static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, - upb_dispatcher *d) { - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_msg_has(msg, f)) continue; - upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number); - if (!hf) continue; - upb_value val = upb_msg_get(msg, f); - if (upb_isarray(f)) { - upb_array *arr = upb_value_getarr(val); - for (uint32_t j = 0; j < upb_array_len(arr); ++j) { - upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf); - } - } else { - upb_msg_pushval(val, f, d, hf); - } - } - return UPB_CONTINUE; -} - -void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h, - void *closure, upb_status *status) { - upb_dispatcher d; - upb_dispatcher_init(&d, h, NULL, NULL, NULL); - upb_dispatcher_reset(&d, closure); - - upb_dispatch_startmsg(&d); - upb_msg_dispatch(msg, md, &d); - upb_dispatch_endmsg(&d, status); - - upb_dispatcher_uninit(&d); -} -#endif - -/* Standard writers. **********************************************************/ - -void upb_stdmsg_sethas(void *_m, upb_value fval) { - assert(_m != NULL); - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - if (f->hasbit >= 0) - m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8)); -} - -bool upb_stdmsg_has(const void *_m, upb_value fval) { - assert(_m != NULL); - const char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - return f->hasbit < 0 || - (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8))); -} - -#define UPB_ACCESSORS(type, ctype) \ +#define UPB_ACCESSOR(type, ctype) \ upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \ upb_value val) { \ assert(_m != NULL); \ @@ -108,230 +19,17 @@ bool upb_stdmsg_has(const void *_m, upb_value fval) { *(ctype*)&m[f->offset] = upb_value_get ## type(val); \ return UPB_CONTINUE; \ } \ - \ - upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \ - upb_value val) { \ - (void)_fval; \ - assert(a != NULL); \ - ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \ - *p = upb_value_get ## type(val); \ - return UPB_CONTINUE; \ - } \ - \ - upb_value upb_stdmsg_get ## type(const void *_m, upb_value fval) { \ - assert(_m != NULL); \ - const uint8_t *m = _m; \ - const upb_fielddef *f = upb_value_getfielddef(fval); \ - upb_value ret; \ - upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \ - return ret; \ - } \ - upb_value upb_stdmsg_seqget ## type(const void *i) { \ - assert(i != NULL); \ - upb_value val; \ - upb_value_set ## type(&val, *(ctype*)i); \ - return val; \ - } -UPB_ACCESSORS(double, double) -UPB_ACCESSORS(float, float) -UPB_ACCESSORS(int32, int32_t) -UPB_ACCESSORS(int64, int64_t) -UPB_ACCESSORS(uint32, uint32_t) -UPB_ACCESSORS(uint64, uint64_t) -UPB_ACCESSORS(bool, bool) -UPB_ACCESSORS(ptr, void*) +UPB_ACCESSOR(double, double) +UPB_ACCESSOR(float, float) +UPB_ACCESSOR(int32, int32_t) +UPB_ACCESSOR(int64, int64_t) +UPB_ACCESSOR(uint32, uint32_t) +UPB_ACCESSOR(uint64, uint64_t) +UPB_ACCESSOR(bool, bool) +UPB_ACCESSOR(ptr, void*) #undef UPB_ACCESSORS -static void _upb_stdmsg_setstr(void *_dst, upb_value src) { - upb_stdarray **dstp = _dst; - upb_stdarray *dst = *dstp; - if (!dst) { - dst = malloc(sizeof(*dst)); - dst->size = 0; - dst->ptr = NULL; - *dstp = dst; - } - dst->len = 0; - const upb_byteregion *bytes = upb_value_getbyteregion(src); - uint32_t len = upb_byteregion_len(bytes); - if (len > dst->size) { - dst->size = len; - dst->ptr = realloc(dst->ptr, dst->size); - } - dst->len = len; - upb_byteregion_copyall(bytes, dst->ptr); -} - -upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) { - assert(_m != NULL); - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - // Hasbit automatically set by the handlers. - _upb_stdmsg_setstr(&m[f->offset], val); - return UPB_CONTINUE; -} - -upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) { - assert(a != NULL); - (void)fval; - _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val); - return UPB_CONTINUE; -} - -upb_value upb_stdmsg_getstr(const void *m, upb_value fval) { - assert(m != NULL); - return upb_stdmsg_getptr(m, fval); -} - -upb_value upb_stdmsg_seqgetstr(const void *i) { - assert(i != NULL); - return upb_stdmsg_seqgetptr(i); -} - -void *upb_stdmsg_new(const upb_msgdef *md) { - void *m = malloc(md->size); - memset(m, 0, md->size); - upb_msg_clear(m, md); - return m; -} - -void upb_stdseq_free(void *s, upb_fielddef *f) { - upb_stdarray *a = s; - if (upb_issubmsg(f) || upb_isstring(f)) { - void **p = (void**)a->ptr; - for (uint32_t i = 0; i < a->size; i++) { - if (upb_issubmsg(f)) { - upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def)); - } else { - upb_stdarray *str = p[i]; - free(str->ptr); - free(str); - } - } - } - free(a->ptr); - free(a); -} - -void upb_stdmsg_free(void *m, const upb_msgdef *md) { - if (m == NULL) return; - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue; - void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval)); - if (subp == NULL) continue; - if (upb_isseq(f)) { - upb_stdseq_free(subp, f); - } else if (upb_issubmsg(f)) { - upb_stdmsg_free(subp, upb_downcast_msgdef(f->def)); - } else { - upb_stdarray *str = subp; - free(str->ptr); - free(str); - } - } - free(m); -} - -upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) { - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - upb_stdarray **arr = (void*)&m[f->offset]; - if (!upb_stdmsg_has(_m, fval)) { - if (!*arr) { - *arr = malloc(sizeof(**arr)); - (*arr)->size = 0; - (*arr)->ptr = NULL; - } - (*arr)->len = 0; - upb_stdmsg_sethas(m, fval); - } - return UPB_CONTINUE_WITH(*arr); -} - -void upb_stdmsg_recycle(void **m, const upb_msgdef *md) { - if (*m) - upb_msg_clear(*m, md); - else - *m = upb_stdmsg_new(md); -} - -upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) { - assert(_m != NULL); - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - void **subm = (void*)&m[f->offset]; - if (!upb_stdmsg_has(m, fval)) { - upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def)); - upb_stdmsg_sethas(m, fval); - } - return UPB_CONTINUE_WITH(*subm); -} - -upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) { - assert(a != NULL); - const upb_fielddef *f = upb_value_getfielddef(fval); - void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*)); - upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def)); - return UPB_CONTINUE_WITH(*subm); -} - -const void *upb_stdmsg_seqbegin(const void *_a) { - const upb_stdarray *a = _a; - return a->len > 0 ? a->ptr : NULL; -} - -#define NEXTFUNC(size) \ - const void *upb_stdmsg_ ## size ## byte_seqnext(const void *_a, const void *iter) {\ - const upb_stdarray *a = _a; \ - const void *next = (char*)iter + size; \ - return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \ - } - -NEXTFUNC(8) -NEXTFUNC(4) -NEXTFUNC(1) - -#define STDMSG(type, size) { static upb_accessor_vtbl vtbl = { \ - &upb_stdmsg_startsubmsg, \ - &upb_stdmsg_set ## type, \ - &upb_stdmsg_startseq, \ - &upb_stdmsg_startsubmsg_r, \ - &upb_stdmsg_set ## type ## _r, \ - &upb_stdmsg_has, \ - &upb_stdmsg_getptr, \ - &upb_stdmsg_get ## type, \ - &upb_stdmsg_seqbegin, \ - &upb_stdmsg_ ## size ## byte_seqnext, \ - &upb_stdmsg_seqget ## type}; \ - return &vtbl; } - -upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) { - switch (f->type) { - case UPB_TYPE(DOUBLE): STDMSG(double, 8) - case UPB_TYPE(FLOAT): STDMSG(float, 4) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): STDMSG(uint64, 8) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): STDMSG(int64, 8) - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(ENUM): - case UPB_TYPE(SFIXED32): STDMSG(int32, 4) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): STDMSG(uint32, 4) - case UPB_TYPE(BOOL): STDMSG(bool, 1) - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): STDMSG(str, 8) // TODO: 32-bit - } - return NULL; -} - static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) { (void)c; @@ -344,7 +42,7 @@ static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, } else { upb_fhandlers_setvalue(fh, f->accessor->set); upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg); - upb_fhandlers_setvaluehasbit(fh, f->hasbit); + upb_fhandlers_sethasbit(fh, f->hasbit); } } } @@ -68,34 +68,18 @@ typedef struct _upb_accessor_vtbl { upb_seqget_handler *seqget; } upb_accessor_vtbl; -// Registers handlers for writing into a message of the given type. +// Registers handlers for writing into a message of the given type using +// whatever accessors it has defined. upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m); -// Returns an stdmsg accessor for the given fielddef. -upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f); - - -/* upb_msg/upb_seq ************************************************************/ - -// upb_msg and upb_seq allow for generic access to a message through its -// accessor vtable. Note that these do *not* allow you to create, destroy, or -// take references on the objects -- these operations are specifically outside -// the scope of what the accessors define. - -// Clears all hasbits. -// TODO: Add a separate function for setting primitive values back to their -// defaults (but not strings, submessages, or arrays). -void upb_msg_clear(void *msg, const upb_msgdef *md); - INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) { ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8)); } -// Could add a method that recursively clears submessages, strings, and -// arrays if desired. This could be a win if you wanted to merge without -// needing hasbits, because during parsing you would never clear submessages -// or arrays. Also this could be desired to provide proto2 operations on -// generated messages. +/* upb_msg/upb_seq ************************************************************/ + +// These accessor functions are simply convenience methods for reading or +// writing to a message through its accessors. INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) { return f->accessor && f->accessor->has(m, f->fval); @@ -148,65 +132,11 @@ INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md, return true; } - -/* upb_msgvisitor *************************************************************/ - -// A upb_msgvisitor reads data from an in-memory structure using its accessors, -// pushing the results to a given set of upb_handlers. -// TODO: not yet implemented. - -typedef struct { - upb_fhandlers *fh; - upb_fielddef *f; - uint16_t msgindex; // Only when upb_issubmsg(f). -} upb_msgvisitor_field; - -typedef struct { - upb_msgvisitor_field *fields; - int fields_len; -} upb_msgvisitor_msg; - -typedef struct { - uint16_t msgindex; - uint16_t fieldindex; - uint32_t arrayindex; // UINT32_MAX if not an array frame. -} upb_msgvisitor_frame; - -typedef struct { - upb_msgvisitor_msg *messages; - int messages_len; - upb_dispatcher dispatcher; -} upb_msgvisitor; - -// Initializes a msgvisitor that will push data from messages of the given -// msgdef to the given set of handlers. -void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h); -void upb_msgvisitor_uninit(upb_msgvisitor *v); - -void upb_msgvisitor_reset(upb_msgvisitor *v, void *m); -void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status); - - -/* Standard writers. **********************************************************/ - -// Allocates a new stdmsg. -void *upb_stdmsg_new(const upb_msgdef *md); - -// Recursively frees any strings or submessages that the message refers to. -void upb_stdmsg_free(void *m, const upb_msgdef *md); - -void upb_stdmsg_sethas(void *_m, upb_value fval); - -// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit. -upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset); -upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset, - uint16_t subm_size, uint8_t subm_setbytes); - // Value writers for every in-memory type: write the data to a known offset -// from the closure "c" and set the hasbit (if any). -// TODO: can we get away with having only one for int64, uint64, double, etc? -// The main thing in the way atm is that the upb_value is strongly typed. -// in debug mode. +// from the closure "c." +// +// TODO(haberman): instead of having standard writer functions, should we have +// a bool in the accessor that says "write raw value to the field's offset"? upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val); @@ -216,94 +146,6 @@ upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val); -// Value writers for repeated fields: the closure points to a standard array -// struct, appends the value to the end of the array, resizing with realloc() -// if necessary. -typedef struct { - char *ptr; - uint32_t len; // Number of elements present. - uint32_t size; // Number of elements allocated. -} upb_stdarray; - -void *upb_stdarray_append(upb_stdarray *a, size_t type_size); - -upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setptr_r(void *c, upb_value fval, upb_value val); - -// Writers for C strings (NULL-terminated): we can find a char* at a known -// offset from the closure "c". Calls realloc() on the pointer to allocate -// the memory (TODO: investigate whether checking malloc_usable_size() would -// be cheaper than realloc()). Also sets the hasbit, if any. -// -// Since the string is NULL terminated and does not store an explicit length, -// these are not suitable for binary data that can contain NULLs. -upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val); - -// Writers for length-delimited strings: we explicitly store the length, so -// the data can contain NULLs. Stores the data using upb_stdarray -// which is located at a known offset from the closure "c" (note that it -// is included inline rather than pointed to). Also sets the hasbit, if any. -upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val); - -// Writers for startseq and startmsg which allocate (or reuse, if possible) -// a sub data structure (upb_stdarray or a submessage, respectively), -// setting the hasbit. If the hasbit is already set, the existing data -// structure is used verbatim. If the hasbit is not already set, the pointer -// is checked for NULL. If it is NULL, a new substructure is allocated, -// cleared, and used. If it is not NULL, the existing substructure is -// cleared and reused. -// -// If there is no hasbit, we always behave as if the hasbit was not set, -// so any existing data for this array or submessage is cleared. In most -// cases this will be fine since each array or non-repeated submessage should -// occur at most once in the stream. But if the client is using "concatenation -// as merging", it will want to make sure hasbits are allocated so merges can -// happen appropriately. -// -// If there was a demand for the behavior that absence of a hasbit acts as if -// the bit was always set, we could provide that also. But Clear() would need -// to act recursively, which is less efficient since it requires an extra pass -// over the tree. -upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval); -upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval); -upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval); - - -/* Standard readers. **********************************************************/ - -bool upb_stdmsg_has(const void *c, upb_value fval); -const void *upb_stdmsg_seqbegin(const void *c); - -upb_value upb_stdmsg_getint64(const void *c, upb_value fval); -upb_value upb_stdmsg_getint32(const void *c, upb_value fval); -upb_value upb_stdmsg_getuint64(const void *c, upb_value fval); -upb_value upb_stdmsg_getuint32(const void *c, upb_value fval); -upb_value upb_stdmsg_getdouble(const void *c, upb_value fval); -upb_value upb_stdmsg_getfloat(const void *c, upb_value fval); -upb_value upb_stdmsg_getbool(const void *c, upb_value fval); -upb_value upb_stdmsg_getptr(const void *c, upb_value fval); - -const void *upb_stdmsg_8byte_seqnext(const void *c, const void *iter); -const void *upb_stdmsg_4byte_seqnext(const void *c, const void *iter); -const void *upb_stdmsg_1byte_seqnext(const void *c, const void *iter); - -upb_value upb_stdmsg_seqgetint64(const void *c); -upb_value upb_stdmsg_seqgetint32(const void *c); -upb_value upb_stdmsg_seqgetuint64(const void *c); -upb_value upb_stdmsg_seqgetuint32(const void *c); -upb_value upb_stdmsg_seqgetdouble(const void *c); -upb_value upb_stdmsg_seqgetfloat(const void *c); -upb_value upb_stdmsg_seqgetbool(const void *c); -upb_value upb_stdmsg_seqgetptr(const void *c); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 06125dd..b0e2392 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -13,6 +13,33 @@ #include "upb/pb/decoder.h" #include "upb/pb/varint.h" +typedef struct { + uint8_t native_wire_type; + bool is_numeric; +} upb_decoder_typeinfo; + +static const upb_decoder_typeinfo upb_decoder_types[] = { + {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP + {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE + {UPB_WIRE_TYPE_32BIT, true}, // FLOAT + {UPB_WIRE_TYPE_VARINT, true}, // INT64 + {UPB_WIRE_TYPE_VARINT, true}, // UINT64 + {UPB_WIRE_TYPE_VARINT, true}, // INT32 + {UPB_WIRE_TYPE_64BIT, true}, // FIXED64 + {UPB_WIRE_TYPE_32BIT, true}, // FIXED32 + {UPB_WIRE_TYPE_VARINT, true}, // BOOL + {UPB_WIRE_TYPE_DELIMITED, false}, // STRING + {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP + {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE + {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES + {UPB_WIRE_TYPE_VARINT, true}, // UINT32 + {UPB_WIRE_TYPE_VARINT, true}, // ENUM + {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32 + {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64 + {UPB_WIRE_TYPE_VARINT, true}, // SINT32 + {UPB_WIRE_TYPE_VARINT, true}, // SINT64 +}; + /* upb_decoderplan ************************************************************/ #ifdef UPB_USE_JIT_X64 @@ -32,37 +59,6 @@ #include "upb/pb/decoder_x64.h" #endif -typedef struct { - upb_fhandlers base; - void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); -#ifdef UPB_USE_JIT_X64 - uint32_t jit_pclabel; - uint32_t jit_pclabel_notypecheck; -#endif -} upb_dplanfield; - -typedef struct { - upb_mhandlers base; -#ifdef UPB_USE_JIT_X64 - uint32_t jit_startmsg_pclabel; - uint32_t jit_endofbuf_pclabel; - uint32_t jit_endofmsg_pclabel; - uint32_t jit_dyndispatch_pclabel; - uint32_t jit_unknownfield_pclabel; - int32_t jit_parent_field_done_pclabel; - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; -#endif -} upb_dplanmsg; - -static void *upb_decoderplan_fptrs[]; - -void upb_decoderplan_initfhandlers(upb_fhandlers *f) { - f->decode = upb_decoderplan_fptrs[f->type]; -} - upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { upb_decoderplan *p = malloc(sizeof(*p)); p->handlers = h; @@ -72,17 +68,6 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { p->jit_code = NULL; if (allowjit) upb_decoderplan_makejit(p); #endif - // Set function pointers for each field's decode function. - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *m = h->msgs[i]; - for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); - !upb_inttable_done(i); - i = upb_inttable_next(&m->fieldtab, i)) { - upb_itofhandlers_ent *e = upb_inttable_iter_value(i); - upb_fhandlers *f = e->f; - upb_decoderplan_initfhandlers(f); - } - } return p; } @@ -396,14 +381,6 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { upb_push_msg(d, f, upb_decoder_offset(d) + len); } -#define F(type) &upb_decode_ ## type -static void *upb_decoderplan_fptrs[] = { - &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64), - F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING), - F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32), - F(SFIXED64), F(SINT32), F(SINT64)}; -#undef F - /* The main decoding loop *****************************************************/ @@ -431,16 +408,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { if (!upb_trydecode_varint32(d, &tag)) return NULL; uint8_t wire_type = tag & 0x7; uint32_t fieldnum = tag >> 3; - upb_itofhandlers_ent *e = upb_inttable_fastlookup( - d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent)); - upb_fhandlers *f = e ? e->f : NULL; + const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum); + upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL; + bool is_packed = false; if (f) { // Wire type check. - if (wire_type == upb_types[f->type].native_wire_type || - (wire_type == UPB_WIRE_TYPE_DELIMITED && - upb_types[f->type].is_numeric)) { + if (wire_type == upb_decoder_types[f->type].native_wire_type) { // Wire type is ok. + } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && + upb_decoder_types[f->type].is_numeric)) { + // Wire type is ok (and packed). + is_packed = true; } else { f = NULL; } @@ -453,19 +432,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { if (fr->is_sequence && fr->f != f) { upb_dispatch_endseq(&d->dispatcher); upb_decoder_setmsgend(d); + fr = d->dispatcher.top; } - if (f && f->repeated && (!fr->is_sequence || fr->f != f)) { - uint64_t old_end = d->dispatcher.top->end_ofs; - upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f); - if (wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) { - // Non-packed field -- this tag pertains to only a single message. - fr->end_ofs = old_end; - } else { + if (f && f->repeated && !fr->is_sequence) { + upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f); + if (is_packed) { // Packed primitive field. uint32_t len = upb_decode_varint32(d); - fr->end_ofs = upb_decoder_offset(d) + len; - fr->is_packed = true; + fr2->end_ofs = upb_decoder_offset(d) + len; + fr2->is_packed = true; + } else { + // Non-packed field -- this tag pertains to only a single message. + fr2->end_ofs = fr->end_ofs; } upb_decoder_setmsgend(d); } @@ -513,13 +491,37 @@ upb_success_t upb_decoder_decode(upb_decoder *d) { if (!d->top_is_packed) f = upb_decode_tag(d); if (!f) { // Sucessful EOF. We may need to dispatch a top-level implicit frame. - if (d->dispatcher.top == d->dispatcher.stack + 1) { - assert(d->dispatcher.top->is_sequence); + if (d->dispatcher.top->is_sequence) { + assert(d->dispatcher.top == d->dispatcher.stack + 1); upb_dispatch_endseq(&d->dispatcher); } + assert(d->dispatcher.top == d->dispatcher.stack); + upb_dispatch_endmsg(&d->dispatcher, &d->status); return UPB_OK; } - f->decode(d, f); + + switch (f->type) { + case UPB_TYPE_ENDGROUP: upb_endgroup(d, f); break; + case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break; + case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break; + case UPB_TYPE(INT64): upb_decode_INT64(d, f); break; + case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break; + case UPB_TYPE(INT32): upb_decode_INT32(d, f); break; + case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break; + case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break; + case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break; + case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break; + case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break; + case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break; + case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break; + case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break; + case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break; + case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break; + case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break; + case UPB_TYPE_NONE: assert(false); break; + } upb_decoder_checkpoint(d); } } @@ -542,7 +544,6 @@ void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) { void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *closure) { assert(d->plan); - assert(upb_byteregion_discardofs(input) == upb_byteregion_startofs(input)); upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]); upb_status_clear(&d->status); diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index fa984ef..f58e403 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -9,8 +9,8 @@ |// parsing the specific message and calling specific handlers. |// |// Since the JIT can call other functions (the JIT'ted code is not a leaf -|// function) we must respect alignment rules. On OS X, this means aligning -|// the stack to 16 bytes. +|// function) we must respect alignment rules. All x86-64 systems require +|// 16-byte stack alignment. #include <sys/mman.h> #include "dynasm/dasm_x86.h" @@ -103,7 +103,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) { // Has to be a separate function, otherwise GCC will complain about // expressions like (&foo != NULL) because they will never evaluate // to false. -static void upb_assert_notnull(void *addr) { assert(addr != NULL); } +static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |.arch x64 |.actionlist upb_jit_actionlist @@ -401,45 +401,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } } -#if 0 -// These appear not to speed things up, but keeping around for -// further experimentation. -static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size, - upb_fhandlers *f) { - | mov eax, STDARRAY:ARG1_64->len - | cmp eax, STDARRAY:ARG1_64->size - | jne >2 - // If array is full, fall back to actual function. - | loadfval f - | callp f->value - | jmp >3 - |2: - | mov rcx, STDARRAY:ARG1_64->ptr - | mov esi, eax - | add eax, 1 - - switch (size) { - case 8: - | mov [rcx + rsi * 8], ARG3_64 - break; - - case 4: - | mov [rcx + rsi * 4], ARG3_32 - break; - - case 1: - | mov [rcx + rsi * 4], ARG3_8 - break; - } - - | mov STDARRAY:ARG1_64->len, eax - |3: -} -#endif - static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, upb_fhandlers *f) { - // Call callbacks. + // Call callbacks. Specializing the append accessors didn't yield a speed + // increase in benchmarks. if (upb_issubmsgtype(f->type)) { if (f->type == UPB_TYPE(MESSAGE)) { | mov rsi, PTR @@ -457,7 +422,10 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, | mov ARG1_64, CLOSURE | loadfval f | callp f->startsubmsg + | sethas CLOSURE, f->hasbit | mov CLOSURE, rdx + } else { + | sethas CLOSURE, f->hasbit } | mov qword FRAME->closure, CLOSURE // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK @@ -465,6 +433,7 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); | call =>sub_m->jit_startmsg_pclabel; + | popframe upb_fhandlers_getmsg(f) // Call endsubmsg handler (if any). if (f->endsubmsg) { @@ -473,7 +442,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, | loadfval f | callp f->endsubmsg } - | popframe upb_fhandlers_getmsg(f) // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK | mov DECODER->ptr, PTR } else { @@ -494,21 +462,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, } else if (f->value == &upb_stdmsg_setbool) { const upb_fielddef *fd = upb_value_getfielddef(f->fval); | mov [ARG1_64 + fd->offset], ARG3_8 -#if 0 - // These appear not to speed things up, but keeping around for - // further experimentation. - } else if (f->value == &upb_stdmsg_setint64_r || - f->value == &upb_stdmsg_setuint64_r || - f->value == &upb_stdmsg_setptr_r || - f->value == &upb_stdmsg_setdouble_r) { - upb_decoderplan_jit_doappend(plan, 8, f); - } else if (f->value == &upb_stdmsg_setint32_r || - f->value == &upb_stdmsg_setuint32_r || - f->value == &upb_stdmsg_setfloat_r) { - upb_decoderplan_jit_doappend(plan, 4, f); - } else if (f->value == &upb_stdmsg_setbool_r) { - upb_decoderplan_jit_doappend(plan, 1, f); -#endif } else if (f->value) { // Load closure and fval into arg registers. ||#ifndef NDEBUG @@ -520,16 +473,26 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, | loadfval f | callp f->value } - | sethas CLOSURE, f->valuehasbit + | sethas CLOSURE, f->hasbit // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK | mov DECODER->ptr, PTR } } +static uint64_t upb_get_encoded_tag(upb_fhandlers *f) { + uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type; + uint64_t encoded_tag = upb_vencode32(tag); + // No tag should be greater than 5 bytes. + assert(encoded_tag <= 0xffffffffff); + return encoded_tag; +} + // PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag, - uint64_t next_tag, upb_mhandlers *m, +static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m, upb_fhandlers *f, upb_fhandlers *next_f) { + uint64_t tag = upb_get_encoded_tag(f); + uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0; + // PC-label for the dispatch table. // We check the wire type (which must be loaded in edx) because the // table is keyed on field number, not type. @@ -541,10 +504,13 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag, | mov rsi, FRAME->end_ofs | pushframe f, rsi, true if (f->startseq) { - | mov ARG1_64, CLOSURE + | mov ARG1_64, CLOSURE | loadfval f - | callp f->startseq - | mov CLOSURE, rdx + | callp f->startseq + | sethas CLOSURE, f->hasbit + | mov CLOSURE, rdx + } else { + | sethas CLOSURE, f->hasbit } | mov qword FRAME->closure, CLOSURE } @@ -590,6 +556,11 @@ static int upb_compare_uint32(const void *a, const void *b) { } static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { + |=>m->jit_afterstartmsg_pclabel: + // There was a call to get here, so we need to align the stack. + | sub rsp, 8 + | jmp >1 + |=>m->jit_startmsg_pclabel: // There was a call to get here, so we need to align the stack. | sub rsp, 8 @@ -602,6 +573,7 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK } + |1: | setmsgend m | check_eob m | mov ecx, dword [PTR] @@ -616,30 +588,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { int num_keys = upb_inttable_count(&m->fieldtab); uint32_t *keys = malloc(num_keys * sizeof(*keys)); int idx = 0; - for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); - !upb_inttable_done(i); - i = upb_inttable_next(&m->fieldtab, i)) { - keys[idx++] = upb_inttable_iter_key(i); + upb_inttable_iter i; + upb_inttable_begin(&i, &m->fieldtab); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + keys[idx++] = upb_inttable_iter_key(&i); } qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); - upb_fhandlers *last_f = NULL; - uint64_t last_encoded_tag = 0; for(int i = 0; i < num_keys; i++) { - uint32_t fieldnum = keys[i]; - upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum); - upb_fhandlers *f = e->f; - assert(f->number == fieldnum); - uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type; - uint64_t encoded_tag = upb_vencode32(tag); - // No tag should be greater than 5 bytes. - assert(encoded_tag <= 0xffffffffff); - if (last_f) upb_decoderplan_jit_field( - plan, last_encoded_tag, encoded_tag, m, last_f, f); - last_encoded_tag = encoded_tag; - last_f = f; + upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]); + upb_fhandlers *next_f = + (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL; + upb_decoderplan_jit_field(plan, m, f, next_f); } - upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL); free(keys); @@ -733,18 +694,19 @@ static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f, static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) { m->jit_startmsg_pclabel = (*pclabel_count)++; + m->jit_afterstartmsg_pclabel = (*pclabel_count)++; m->jit_endofbuf_pclabel = (*pclabel_count)++; m->jit_endofmsg_pclabel = (*pclabel_count)++; m->jit_dyndispatch_pclabel = (*pclabel_count)++; m->jit_unknownfield_pclabel = (*pclabel_count)++; m->max_field_number = 0; upb_inttable_iter i; - for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); - i = upb_inttable_next(&m->fieldtab, i)) { - uint32_t key = upb_inttable_iter_key(i); + upb_inttable_begin(&i, &m->fieldtab); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + uint32_t key = upb_inttable_iter_key(&i); m->max_field_number = UPB_MAX(m->max_field_number, key); - upb_itofhandlers_ent *e = upb_inttable_iter_value(i); - upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count); + upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i)); + upb_decoderplan_jit_assignfieldlabs(f, pclabel_count); } // TODO: support large field numbers by either using a hash table or // generating code for a binary search. For now large field numbers @@ -784,11 +746,12 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { // Create dispatch tables. for (int i = 0; i < h->msgs_len; i++) { upb_mhandlers *m = h->msgs[i]; + // We jump to after the startmsg handler since it is called before entering + // the JIT (either by upb_decoder or by a previous call to the JIT). m->jit_func = - plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel); + plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel); for (uint32_t j = 0; j <= m->max_field_number; j++) { - upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j); - upb_fhandlers *f = e ? e->f : NULL; + upb_fhandlers *f = upb_mhandlers_lookup(m, j); if (f) { m->tablearray[j] = plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel); diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 4949fe3..40b901d 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -1,84 +1,17 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010 Google Inc. See LICENSE for details. + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> */ #include "upb/bytestream.h" -#include "upb/descriptor.h" -#include "upb/msg.h" +#include "upb/descriptor/reader.h" #include "upb/pb/decoder.h" #include "upb/pb/glue.h" -#include "upb/pb/textprinter.h" - -bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md, - bool allow_jit, upb_status *status) { - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str, len); - - upb_decoder d; - upb_handlers *h = upb_handlers_new(); - upb_accessors_reghandlers(h, md); - upb_decoderplan *p = upb_decoderplan_new(h, allow_jit); - upb_decoder_init(&d); - upb_handlers_unref(h); - upb_decoder_resetplan(&d, p, 0); - upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg); - upb_success_t ret = upb_decoder_decode(&d); - // stringsrc and the handlers registered by upb_accessors_reghandlers() - // should not suspend. - assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d))); - if (status) upb_status_copy(status, upb_decoder_status(&d)); - - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_decoderplan_unref(p); - return ret == UPB_OK; -} - -void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) { - void *msg = upb_stdmsg_new(md); - size_t len; - char *data = upb_readfile(fname, &len); - if (!data) goto err; - upb_strtomsg(data, len, msg, md, false, s); - if (!upb_ok(s)) goto err; - return msg; - -err: - upb_stdmsg_free(msg, md); - return NULL; -} - -#if 0 -void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, - bool single_line) { - upb_stringsink strsink; - upb_stringsink_init(&strsink); - upb_stringsink_reset(&strsink, str); - - upb_textprinter *p = upb_textprinter_new(); - upb_handlers *h = upb_handlers_new(); - upb_textprinter_reghandlers(h, md); - upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line); - - upb_status status = UPB_STATUS_INIT; - upb_msg_runhandlers(msg, md, h, p, &status); - // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be - // capable of returning an error. - assert(upb_ok(&status)); - upb_status_uninit(&status); - - upb_stringsink_uninit(&strsink); - upb_textprinter_free(p); - upb_handlers_unref(h); -} -#endif upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, - upb_status *status) { + void *owner, upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str, len); @@ -104,35 +37,20 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, upb_descreader_uninit(&r); return NULL; } - upb_def **defs = upb_descreader_getdefs(&r, n); + upb_def **defs = upb_descreader_getdefs(&r, owner, n); upb_def **defscopy = malloc(sizeof(upb_def*) * (*n)); memcpy(defscopy, defs, sizeof(upb_def*) * (*n)); upb_descreader_uninit(&r); - // Set default accessors and layouts on all messages. - for(int i = 0; i < *n; i++) { - upb_def *def = defscopy[i]; - upb_msgdef *md = upb_dyncast_msgdef(def); - if (!md) continue; - // For field in msgdef: - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f)); - } - upb_msgdef_layout(md); - } - return defscopy; } bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len, upb_status *status) { int n; - upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, status); + upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status); if (!defs) return false; - bool success = upb_symtab_add(s, defs, n, status); - for(int i = 0; i < n; i++) upb_def_unref(defs[i]); + bool success = upb_symtab_add(s, defs, n, &defs, status); free(defs); return success; } diff --git a/upb/pb/glue.h b/upb/pb/glue.h index ff8c85e..6179d8d 100644 --- a/upb/pb/glue.h +++ b/upb/pb/glue.h @@ -1,7 +1,7 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * * upb's core components like upb_decoder and upb_msg are carefully designed to @@ -34,25 +34,12 @@ extern "C" { #endif -// Decodes the given string, which must be in protobuf binary format, to the -// given upb_msg with msgdef "md", storing the status of the operation in "s". -bool upb_strtomsg(const char *str, size_t len, void *msg, - const upb_msgdef *md, bool allow_jit, upb_status *s); - -// Parses the given file into a new message of the given type. Caller owns -// the returned message (or NULL if an error occurred). -void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s); - -//void upb_msgtotext(struct _upb_string *str, void *msg, -// struct _upb_msgdef *md, bool single_line); - - // Loads all defs from the given protobuf binary descriptor, setting default // accessors and a default layout on all messages. The caller owns the // returned array of defs, which will be of length *n. On error NULL is // returned and status is set (if non-NULL). upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, - upb_status *status); + void *owner, upb_status *status); // Like the previous but also adds the loaded defs to the given symtab. bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str, diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 3f68f90..0d9c967 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -96,7 +96,7 @@ err: const upb_fielddef *f = upb_value_getfielddef(fval); \ uint64_t start_ofs = upb_bytesink_getoffset(p->sink); \ CHECK(upb_textprinter_indent(p)); \ - CHECK(upb_bytesink_writestr(p->sink, f->name)); \ + CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \ CHECK(upb_bytesink_writestr(p->sink, ": ")); \ CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val))); \ CHECK(upb_textprinter_endfield(p)); \ @@ -124,7 +124,8 @@ static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval, upb_textprinter *p = _p; uint64_t start_ofs = upb_bytesink_getoffset(p->sink); const upb_fielddef *f = upb_value_getfielddef(fval); - upb_enumdef *enum_def = upb_downcast_enumdef(f->def); + const upb_enumdef *enum_def = + upb_downcast_enumdef_const(upb_fielddef_subdef(f)); const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val)); if (label) { CHECK(upb_bytesink_writestr(p->sink, label)); @@ -157,7 +158,7 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) { uint64_t start_ofs = upb_bytesink_getoffset(p->sink); const upb_fielddef *f = upb_value_getfielddef(fval); CHECK(upb_textprinter_indent(p)); - CHECK(upb_bytesink_printf(p->sink, "%s {", f->name)); + CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f))); if (!p->single_line) CHECK(upb_bytesink_putc(p->sink, '\n')); p->indent_depth++; diff --git a/upb/pb/varint.h b/upb/pb/varint.h index 815a7a1..c0e0134 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -19,6 +19,16 @@ extern "C" { #endif +// A list of types as they are encoded on-the-wire. +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5, +} upb_wiretype_t; + // The maximum number of bytes that it takes to encode a 64-bit varint. // Note that with a better encoding this could be 9 (TODO: write up a // wiki document about this). diff --git a/upb/refcount.c b/upb/refcount.c new file mode 100644 index 0000000..a15547a --- /dev/null +++ b/upb/refcount.c @@ -0,0 +1,224 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include <stdlib.h> +#include <limits.h> +#include "upb/refcount.h" + +// TODO(haberman): require client to define these if ref debugging is on. +#ifndef UPB_LOCK +#define UPB_LOCK +#endif + +#ifndef UPB_UNLOCK +#define UPB_UNLOCK +#endif + +/* arch-specific atomic primitives *******************************************/ + +#ifdef UPB_THREAD_UNSAFE ////////////////////////////////////////////////////// + +INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; } +INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; } + +#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 /////////////////// + +INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } +INLINE bool upb_atomic_dec(uint32_t *a) { + return __sync_sub_and_fetch(a, 1) == 0; +} + +#elif defined(WIN32) /////////////////////////////////////////////////////////// + +#include <Windows.h> + +INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } +INLINE bool upb_atomic_dec(upb_atomic_t *a) { + return InterlockedDecrement(&a->val) == 0; +} + +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif + +// Reserved index values. +#define UPB_INDEX_UNDEFINED UINT16_MAX +#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1) + +static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) { + if (upb_refcount_merged(r, from)) return; + *r->count += *from->count; + free(from->count); + upb_refcount *base = from; + + // Set all refcount pointers in the "from" chain to the merged refcount. + do { from->count = r->count; } while ((from = from->next) != base); + + // Merge the two circularly linked lists by swapping their next pointers. + upb_refcount *tmp = r->next; + r->next = base->next; + base->next = tmp; +} + +// Tarjan's algorithm, see: +// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm + +typedef struct { + int index; + upb_refcount **stack; + int stack_len; + upb_getsuccessors *func; +} upb_tarjan_state; + +static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state); + +void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) { + upb_tarjan_state *state = _state; + if (subobj->index == UPB_INDEX_UNDEFINED) { + // Subdef has not yet been visited; recurse on it. + upb_refcount_dofindscc(subobj, state); + obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink); + } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) { + // Subdef is in the stack and hence in the current SCC. + obj->lowlink = UPB_MIN(obj->lowlink, subobj->index); + } +} + +static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) { + obj->index = state->index; + obj->lowlink = state->index; + state->index++; + state->stack[state->stack_len++] = obj; + + state->func(obj, state); // Visit successors. + + if (obj->lowlink == obj->index) { + upb_refcount *scc_obj; + while ((scc_obj = state->stack[--state->stack_len]) != obj) { + upb_refcount_merge(obj, scc_obj); + scc_obj->index = UPB_INDEX_NOT_IN_STACK; + } + obj->index = UPB_INDEX_NOT_IN_STACK; + } +} + +bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) { + // TODO(haberman): allocate less memory. We can't use n as a bound because + // it doesn't include fielddefs. Could either use a dynamically-resizing + // array or think of some other way. + upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func}; + if (state.stack == NULL) return false; + for (int i = 0; i < n; i++) + if (refs[i]->index == UPB_INDEX_UNDEFINED) + upb_refcount_dofindscc(refs[i], &state); + free(state.stack); + return true; +} + + +/* upb_refcount **************************************************************/ + +bool upb_refcount_init(upb_refcount *r, void *owner) { + r->count = malloc(sizeof(uint32_t)); + if (!r->count) return false; + // Initializing this here means upb_refcount_findscc() can only run once for + // each refcount; may need to revise this to be more flexible. + r->index = UPB_INDEX_UNDEFINED; + r->next = r; +#ifdef UPB_DEBUG_REFS + // We don't detect malloc() failures for UPB_DEBUG_REFS. + upb_inttable_init(&r->refs); + *r->count = 0; + upb_refcount_ref(r, owner); +#else + *r->count = 1; +#endif + return true; +} + +void upb_refcount_uninit(upb_refcount *r) { + (void)r; +#ifdef UPB_DEBUG_REFS + assert(upb_inttable_count(&r->refs) == 0); + upb_inttable_uninit(&r->refs); +#endif +} + +// Moves an existing ref from ref_donor to new_owner, without changing the +// overall ref count. +void upb_refcount_donateref(upb_refcount *r, void *from, void *to) { + (void)r; (void)from; (void)to; + assert(from != to); +#ifdef UPB_DEBUG_REFS + upb_refcount_ref(r, to); + upb_refcount_unref(r, from); +#endif +} + +// Thread-safe operations ////////////////////////////////////////////////////// + +// Ref and unref are thread-safe. +void upb_refcount_ref(upb_refcount *r, void *owner) { + (void)owner; + upb_atomic_inc(r->count); +#ifdef UPB_DEBUG_REFS + UPB_LOCK; + // Caller must not already own a ref. + assert(upb_inttable_lookup(&r->refs, (uintptr_t)owner) == NULL); + + // If a ref is leaked we want to blame the leak on the whoever leaked the + // ref, not on who originally allocated the refcounted object. We accomplish + // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some + // memory and arrange setup pointers like so: + // + // upb_refcount + // +----------+ +---------+ + // | count |<-+ | + // +----------+ +----------+ + // | table |---X-->| malloc'd | + // +----------+ | memory | + // +----------+ + // + // Since the "malloc'd memory" is allocated inside of "ref" and free'd in + // unref, it will cause a leak if not unref'd. And since the leaked memory + // points to the object itself, the object will be considered "indirectly + // lost" by tools like Valgrind and not shown unless requested (which is good + // because the object's creator may not be responsible for the leak). But we + // have to hide the pointer marked "X" above from Valgrind, otherwise the + // malloc'd memory will appear to be indirectly leaked and the object itself + // will still be considered the primary leak. We hide this pointer from + // Valgrind (et all) by doing a bitwise not on it. + upb_refcount **target = malloc(sizeof(void*)); + uintptr_t obfuscated = ~(uintptr_t)target; + *target = r; + upb_inttable_insert(&r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated)); + UPB_UNLOCK; +#endif +} + +bool upb_refcount_unref(upb_refcount *r, void *owner) { + (void)owner; + bool ret = upb_atomic_dec(r->count); +#ifdef UPB_DEBUG_REFS + UPB_LOCK; + upb_value v; + bool success = upb_inttable_remove(&r->refs, (uintptr_t)owner, &v); + assert(success); + if (success) { + // Must un-obfuscate the pointer (see above). + free((void*)(~upb_value_getuint64(v))); + } + UPB_UNLOCK; +#endif + if (ret) free(r->count); + return ret; +} + +bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) { + return r->count == r2->count; +} diff --git a/upb/refcount.h b/upb/refcount.h new file mode 100644 index 0000000..cb2bda9 --- /dev/null +++ b/upb/refcount.h @@ -0,0 +1,70 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * A thread-safe refcount that can optionally track references for debugging + * purposes. It helps avoid circular references by allowing a + * strongly-connected component in the graph to share a refcount. + * + * This interface is internal to upb. + */ + +#ifndef UPB_REFCOUNT_H_ +#define UPB_REFCOUNT_H_ + +#include <stdbool.h> +#include <stdint.h> +#include "upb/table.h" + +#ifndef NDEBUG +#define UPB_DEBUG_REFS +#endif + +typedef struct _upb_refcount { + uint32_t *count; + struct _upb_refcount *next; // Circularly-linked list of this SCC. + uint16_t index; // For SCC algorithm. + uint16_t lowlink; // For SCC algorithm. +#ifdef UPB_DEBUG_REFS + upb_inttable refs; +#endif +} upb_refcount; + +// NON THREAD SAFE operations ////////////////////////////////////////////////// + +// Initializes the refcount with a single ref for the given owner. Returns +// NULL if memory could not be allocated. +bool upb_refcount_init(upb_refcount *r, void *owner); + +// Uninitializes the refcount. May only be called after unref() returns true. +void upb_refcount_uninit(upb_refcount *r); + +// Moves an existing ref from ref_donor to new_owner, without changing the +// overall ref count. +void upb_refcount_donateref(upb_refcount *r, void *from, void *to); + +// Finds strongly-connected components among some set of objects and merges all +// refcounts that share a SCC. The given function will be called when the +// algorithm needs to visit children of a particular object; the function +// should call upb_refcount_visit() once for each child obj. +// +// Returns false if memory allocation failed. +typedef void upb_getsuccessors(upb_refcount *obj, void*); +bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func); +void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure); + +// Thread-safe operations ////////////////////////////////////////////////////// + +// Increases the ref count, the new ref is owned by "owner" which must not +// already own a ref. Circular reference chains are not allowed. +void upb_refcount_ref(upb_refcount *r, void *owner); + +// Release a ref owned by owner, returns true if that was the last ref. +bool upb_refcount_unref(upb_refcount *r, void *owner); + +// Returns true if these two objects share a refcount. +bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2); + +#endif // UPB_REFCOUNT_H_ diff --git a/upb/table.c b/upb/table.c index 31c91b1..4e3544e 100644 --- a/upb/table.c +++ b/upb/table.c @@ -4,8 +4,10 @@ * Copyright (c) 2009 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * There are a few printf's strewn throughout this file, uncommenting them - * can be useful for debugging. + * Implementation is heavily inspired by Lua's ltable.c. + * + * TODO: for table iteration we use (array - 1) in several places; is this + * undefined behavior? If so find a better solution. */ #include "upb/table.h" @@ -14,6 +16,8 @@ #include <stdlib.h> #include <string.h> +#define UPB_MAXARRSIZE 16 // 64k. + static const double MAX_LOAD = 0.85; // The minimum percentage of an array part that we will allow. This is a @@ -21,385 +25,319 @@ static const double MAX_LOAD = 0.85; // cache effects). The lower this is, the more memory we'll use. static const double MIN_DENSITY = 0.1; +int upb_log2(uint64_t v) { +#ifdef __GNUC__ + int ret = 31 - __builtin_clz(v); +#else + int ret = 0; + while (v >>= 1) ret++; +#endif + return UPB_MIN(UPB_MAXARRSIZE, ret); +} + +static upb_tabkey upb_strkey(const char *str) { + upb_tabkey k; + k.str = (char*)str; + return k; +} + static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); +typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); +typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2); /* Base table (shared code) ***************************************************/ -static uint32_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; } -static size_t upb_table_entrysize(const upb_table *t) { return t->entry_size; } -static size_t upb_table_valuesize(const upb_table *t) { return t->value_size; } +static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; } + +static bool upb_table_isfull(upb_table *t) { + return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; +} -void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) { +static bool upb_table_init(upb_table *t, uint8_t size_lg2) { t->count = 0; - t->entry_size = entry_size; - t->size_lg2 = 1; - while(upb_table_size(t) < size) t->size_lg2++; - size_t bytes = upb_table_size(t) * t->entry_size; + t->size_lg2 = size_lg2; + size_t bytes = upb_table_size(t) * sizeof(upb_tabent); t->mask = upb_table_size(t) - 1; t->entries = malloc(bytes); + if (!t->entries) return false; + memset(t->entries, 0, bytes); + return true; } -void upb_table_free(upb_table *t) { free(t->entries); } +static void upb_table_uninit(upb_table *t) { free(t->entries); } -/* upb_inttable ***************************************************************/ +static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } -static upb_inttable_entry *intent(const upb_inttable *t, int32_t i) { - //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size); - return UPB_INDEX(t->t.entries, i, t->t.entry_size); +static upb_tabent *upb_table_emptyent(const upb_table *t) { + upb_tabent *e = t->entries + upb_table_size(t); + while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } } -static uint32_t upb_inttable_hashtablesize(const upb_inttable *t) { - return upb_table_size(&t->t); +static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, + upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + upb_tabent *e = hash(t, key); + if (upb_tabent_isempty(e)) return NULL; + while (1) { + if (eql(e->key, key)) return &e->val; + if ((e = e->next) == NULL) return NULL; + } } -void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize, - uint16_t value_size) { - size_t entsize = _upb_inttable_entrysize(value_size); - upb_table_init(&t->t, hashsize, entsize); - for (uint32_t i = 0; i < upb_table_size(&t->t); i++) { - upb_inttable_entry *e = intent(t, i); - e->hdr.key = 0; - e->hdr.next = UPB_END_OF_CHAIN; - e->val.has_entry = 0; +// The given key must not already exist in the table. +static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, + upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + assert(upb_table_lookup(t, key, hash, eql) == NULL); + t->count++; + upb_tabent *mainpos_e = hash(t, key); + upb_tabent *our_e = mainpos_e; + if (!upb_tabent_isempty(mainpos_e)) { // Collision. + upb_tabent *new_e = upb_table_emptyent(t); + upb_tabent *chain = hash(t, mainpos_e->key); // Head of collider's chain. + if (chain == mainpos_e) { + // Existing ent is in its main posisiton (it has the same hash as us, and + // is the head of our chain). Insert to new ent and append to this chain. + new_e->next = mainpos_e->next; + mainpos_e->next = new_e; + our_e = new_e; + } else { + // Existing ent is not in its main position (it is a node in some other + // chain). This implies that no existing ent in the table has our hash. + // Evict it (updating its chain) and use its ent for head of our chain. + *new_e = *mainpos_e; // copies next. + while (chain->next != mainpos_e) chain = chain->next; + chain->next = new_e; + our_e = mainpos_e; + our_e->next = NULL; + } } - t->t.value_size = value_size; - // Always make the array part at least 1 long, so that we know key 0 - // won't be in the hash part (which lets us speed up that code path). - t->array_size = UPB_MAX(1, arrsize); - t->array = malloc(upb_table_valuesize(&t->t) * t->array_size); - t->array_count = 0; - for (uint32_t i = 0; i < t->array_size; i++) { - upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t)); - val->has_entry = false; + our_e->key = key; + our_e->val = val; + assert(upb_table_lookup(t, key, hash, eql) == &our_e->val); +} + +static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, + upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + upb_tabent *chain = hash(t, key); + if (eql(chain->key, key)) { + t->count--; + if (val) *val = chain->val; + if (chain->next) { + upb_tabent *move = chain->next; + *chain = *move; + move->key.num = 0; // Make the slot empty. + } else { + chain->key.num = 0; // Make the slot empty. + } + return true; + } else { + while (chain->next && !eql(chain->next->key, key)) + chain = chain->next; + if (chain->next) { + // Found element to remove. + if (val) *val = chain->next->val; + chain->next->key.num = 0; + chain->next = chain->next->next; + t->count--; + return true; + } else { + return false; + } } } -void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) { - upb_inttable_sizedinit(t, 0, hashsize, value_size); +static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) { + upb_tabent *end = t->entries + upb_table_size(t); + do { if (++e == end) return NULL; } while(e->key.num == 0); + return e; } -void upb_inttable_free(upb_inttable *t) { - upb_table_free(&t->t); - free(t->array); +static upb_tabent *upb_table_begin(const upb_table *t) { + return upb_table_next(t, t->entries - 1); } -static uint32_t empty_intbucket(upb_inttable *table) -{ - // TODO: does it matter that this is biased towards the front of the table? - for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) { - upb_inttable_entry *e = intent(table, i); - if(!e->val.has_entry) return i; - } - assert(false); - return 0; + +/* upb_strtable ***************************************************************/ + +// A simple "subclass" of upb_table that only adds a hash function for strings. + +static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { + // Could avoid the strlen() by using a hash function that terminates on NULL. + return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask); } -// The insert routines have a lot more code duplication between int/string -// variants than I would like, but there's just a bit too much that varies to -// parameterize them. -static void intinsert(upb_inttable *t, uint32_t key, const void *val) { - assert(upb_inttable_lookup(t, key) == NULL); - upb_inttable_value *table_val; - if (_upb_inttable_isarrkey(t, key)) { - table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t)); - t->array_count++; - //printf("Inserting key %d to Array part! %p\n", key, table_val); - } else { - t->t.count++; - uint32_t bucket = _upb_inttable_bucket(t, key); - upb_inttable_entry *table_e = intent(t, bucket); - //printf("Hash part! Inserting into bucket %d?\n", bucket); - if(table_e->val.has_entry) { /* Collision. */ - //printf("Collision!\n"); - if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_intbucket(t); - while (table_e->hdr.next != UPB_END_OF_CHAIN) - table_e = intent(t, table_e->hdr.next); - table_e->hdr.next = empty_bucket; - table_e = intent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_intbucket(t); - uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key); - memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_inttable_entry *evictee_e = intent(t, evictee_bucket); - while(1) { - assert(evictee_e->val.has_entry); - assert(evictee_e->hdr.next != UPB_END_OF_CHAIN); - if(evictee_e->hdr.next == bucket) { - evictee_e->hdr.next = empty_bucket; - break; - } - evictee_e = intent(t, evictee_e->hdr.next); - } - /* table_e remains set to our mainpos. */ - } - } - //printf("Inserting! to:%p, copying to: %p\n", table_e, &table_e->val); - table_val = &table_e->val; - table_e->hdr.key = key; - table_e->hdr.next = UPB_END_OF_CHAIN; - } - memcpy(table_val, val, upb_table_valuesize(&t->t)); - table_val->has_entry = true; - assert(upb_inttable_lookup(t, key) == table_val); +static bool upb_streql(upb_tabkey k1, upb_tabkey k2) { + return strcmp(k1.str, k2.str) == 0; } -// Insert all elements from src into dest. Caller ensures that a resize will -// not be necessary. -static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) { - for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i); - i = upb_inttable_next(src, i)) { - //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst)); - assert((double)(upb_table_count(&dst->t)) / - upb_inttable_hashtablesize(dst) <= MAX_LOAD); - intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i)); - } +bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); } + +void upb_strtable_uninit(upb_strtable *t) { + for (size_t i = 0; i < upb_table_size(&t->t); i++) + free(t->t.entries[i].key.str); + upb_table_uninit(&t->t); } -void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) { - if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) { - //printf("RESIZE!\n"); - // Need to resize. Allocate new table with double the size of however many - // elements we have now, add old elements to it. We create the new hash - // table without an array part, even if the old table had an array part. - // If/when the user calls upb_inttable_compact() again, we'll create an - // array part then. - upb_inttable new_table; - //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t)); - upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t)); - upb_inttable_insertall(&new_table, t); - upb_inttable_free(t); +bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { + if (upb_table_isfull(&t->t)) { + // Need to resize. New table of double the size, add old elements to it. + upb_strtable new_table; + if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false; + upb_strtable_iter i; + upb_strtable_begin(&i, t); + for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_strtable_insert( + &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i)); + } + upb_strtable_uninit(t); *t = new_table; } - intinsert(t, key, val); + if ((k = strdup(k)) == NULL) return false; + upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql); + return true; } -void upb_inttable_compact(upb_inttable *t) { - // Find the largest array part we can that satisfies the MIN_DENSITY - // definition. For now we just count down powers of two. - uint32_t largest_key = 0; - for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); - i = upb_inttable_next(t, i)) { - largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i)); - } - int lg2_array = 0; - while ((1UL << lg2_array) < largest_key) ++lg2_array; - ++lg2_array; // Undo the first iteration. - size_t array_size = 0; - int array_count = 0; - while (lg2_array > 0) { - array_size = (1 << --lg2_array); - //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t)); - if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) { - // Even if 100% of the keys were in the array pary, an array of this - // size would not be dense enough. - continue; - } - array_count = 0; - for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); - i = upb_inttable_next(t, i)) { - if (upb_inttable_iter_key(i) < array_size) - array_count++; - } - //printf("There would be %d things in that array\n", array_count); - if ((double)array_count / array_size >= MIN_DENSITY) break; - } - upb_inttable new_table; - int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD; - //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t)); - upb_inttable_sizedinit(&new_table, array_size, hash_size, - upb_table_valuesize(&t->t)); - //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size); - upb_inttable_insertall(&new_table, t); - upb_inttable_free(t); - *t = new_table; +upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { + return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql); } -upb_inttable_iter upb_inttable_begin(const upb_inttable *t) { - upb_inttable_iter iter = {-1, NULL, true}; // -1 will overflow to 0 on the first iteration. - return upb_inttable_next(t, iter); +void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { + i->t = t; + i->e = upb_table_begin(&t->t); } -upb_inttable_iter upb_inttable_next(const upb_inttable *t, - upb_inttable_iter iter) { - const size_t hdrsize = sizeof(upb_inttable_header); - const size_t entsize = upb_table_entrysize(&t->t); - if (iter.array_part) { - while (++iter.key < t->array_size) { - //printf("considering value %d\n", iter.key); - iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size); - if (iter.value->has_entry) return iter; - } - //printf("Done with array part!\n"); - iter.array_part = false; - // Point to the value of the table[-1] entry. - iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize); - } - void *end = intent(t, upb_inttable_hashtablesize(t)); - // Point to the entry for the value that was previously in iter. - upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize); - do { - e = UPB_INDEX(e, 1, entsize); - //printf("considering value %p (val: %p)\n", e, &e->val); - if(e == end) { - //printf("No values.\n"); - iter.value = NULL; - return iter; - } - } while(!e->val.has_entry); - //printf("USING VALUE! %p\n", e); - iter.key = e->hdr.key; - iter.value = &e->val; - return iter; +void upb_strtable_next(upb_strtable_iter *i) { + i->e = upb_table_next(&i->t->t, i->e); } -/* upb_strtable ***************************************************************/ +/* upb_inttable ***************************************************************/ -static upb_strtable_entry *strent(const upb_strtable *t, int32_t i) { - //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t)); - assert(i <= (int32_t)upb_table_size(&t->t)); - return UPB_INDEX(t->t.entries, i, t->t.entry_size); -} +// For inttables we use a hybrid structure where small keys are kept in an +// array and large keys are put in the hash table. -static uint32_t upb_strtable_size(const upb_strtable *t) { - return upb_table_size(&t->t); +static bool upb_inteql(upb_tabkey k1, upb_tabkey k2) { + return k1.num == k2.num; } -void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) { - t->t.value_size = valuesize; - size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8); - upb_table_init(&t->t, size, entsize); - for (uint32_t i = 0; i < upb_table_size(&t->t); i++) { - upb_strtable_entry *e = strent(t, i); - e->hdr.key = NULL; - e->hdr.next = UPB_END_OF_CHAIN; - } +size_t upb_inttable_count(const upb_inttable *t) { + return t->t.count + t->array_count; } -void upb_strtable_free(upb_strtable *t) { - // Free keys from the strtable. - upb_strtable_iter i; - for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) - free((char*)upb_strtable_iter_key(&i)); - upb_table_free(&t->t); +bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) { + if (!upb_table_init(&t->t, hsize_lg2)) return false; + // Always make the array part at least 1 long, so that we know key 0 + // won't be in the hash part, which simplifies things. + t->array_size = UPB_MAX(1, asize); + t->array_count = 0; + size_t array_bytes = t->array_size * sizeof(upb_value); + t->array = malloc(array_bytes); + if (!t->array) { + upb_table_uninit(&t->t); + return false; + } + memset(t->array, 0xff, array_bytes); + return true; } -static uint32_t strtable_bucket(const upb_strtable *t, const char *key) { - uint32_t hash = MurmurHash2(key, strlen(key), 0); - return (hash & t->t.mask); +bool upb_inttable_init(upb_inttable *t) { + return upb_inttable_sizedinit(t, 0, 4); } -void *upb_strtable_lookup(const upb_strtable *t, const char *key) { - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *e; - do { - e = strent(t, bucket); - if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val; - } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN); - return NULL; +void upb_inttable_uninit(upb_inttable *t) { + upb_table_uninit(&t->t); + free(t->array); } -void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len) { - // TODO: improve. - char *key2 = malloc(len+1); - memcpy(key2, key, len); - key2[len] = '\0'; - void *ret = upb_strtable_lookup(t, key2); - free(key2); - return ret; +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { + assert(upb_arrhas(val)); + if (key < t->array_size) { + assert(!upb_arrhas(t->array[key])); + t->array_count++; + t->array[key] = val; + } else { + if (upb_table_isfull(&t->t)) { + // Need to resize the hash part, but we re-use the array part. + upb_table new_table; + if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false; + upb_tabent *e; + for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e)) + upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql); + upb_table_uninit(&t->t); + t->t = new_table; + } + upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); + } + return true; } -static uint32_t empty_strbucket(upb_strtable *table) { - // TODO: does it matter that this is biased towards the front of the table? - for(uint32_t i = 0; i < upb_strtable_size(table); i++) { - upb_strtable_entry *e = strent(table, i); - if(!e->hdr.key) return i; +upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { + if (key < t->array_size) { + upb_value *v = &t->array[key]; + return upb_arrhas(*v) ? v : NULL; } - assert(false); - return 0; + return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql); } -static void strinsert(upb_strtable *t, const char *key, const void *val) { - assert(upb_strtable_lookup(t, key) == NULL); - t->t.count++; - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *table_e = strent(t, bucket); - if(table_e->hdr.key) { /* Collision. */ - if(bucket == strtable_bucket(t, table_e->hdr.key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_strbucket(t); - while (table_e->hdr.next != UPB_END_OF_CHAIN) - table_e = strent(t, table_e->hdr.next); - table_e->hdr.next = empty_bucket; - table_e = strent(t, empty_bucket); +bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { + if (key < t->array_size) { + if (upb_arrhas(t->array[key])) { + t->array_count--; + if (val) *val = t->array[key]; + t->array[key] = upb_value_uint64(-1); + return true; } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key); - memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_strtable_entry *evictee_e = strent(t, evictee_bucket); - while(1) { - assert(evictee_e->hdr.key); - assert(evictee_e->hdr.next != UPB_END_OF_CHAIN); - if(evictee_e->hdr.next == bucket) { - evictee_e->hdr.next = empty_bucket; - break; - } - evictee_e = strent(t, evictee_e->hdr.next); - } - /* table_e remains set to our mainpos. */ + return false; } + } else { + return upb_table_remove( + &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); } - //fprintf(stderr, "val: %p\n", val); - //fprintf(stderr, "val size: %d\n", t->t.value_size); - memcpy(&table_e->val, val, t->t.value_size); - table_e->hdr.key = strdup(key); - table_e->hdr.next = UPB_END_OF_CHAIN; - //fprintf(stderr, "Looking up, string=%s...\n", key); - assert(upb_strtable_lookup(t, key) == &table_e->val); - //printf("Yay!\n"); } -void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) { - if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { - // Need to resize. New table of double the size, add old elements to it. - //printf("RESIZE!!\n"); - upb_strtable new_table; - upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size); - upb_strtable_iter i; - upb_strtable_begin(&i, t); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - strinsert(&new_table, - upb_strtable_iter_key(&i), - upb_strtable_iter_value(&i)); - } - upb_strtable_free(t); - *t = new_table; +void upb_inttable_compact(upb_inttable *t) { + // Find the largest power of two that satisfies the MIN_DENSITY definition. + int counts[UPB_MAXARRSIZE + 1] = {0}; + upb_inttable_iter i; + for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) + counts[upb_log2(upb_inttable_iter_key(&i))]++; + int count = upb_inttable_count(t); + int size; + for (size = UPB_MAXARRSIZE; size > 1; size--) { + count -= counts[size]; + if (count >= (1 << size) * MIN_DENSITY) break; } - strinsert(t, key, val); + + // Insert all elements into new, perfectly-sized table. + upb_inttable new_table; + int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD; + upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1); + for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) + upb_inttable_insert( + &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i)); + upb_inttable_uninit(t); + *t = new_table; } -void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { - i->e = strent(t, -1); +void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { i->t = t; - upb_strtable_next(i); + i->arrkey = -1; + i->array_part = true; + upb_inttable_next(i); } -void upb_strtable_next(upb_strtable_iter *i) { - upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t)); - upb_strtable_entry *cur = i->e; - do { - cur = (void*)((char*)cur + i->t->t.entry_size); - if(cur == end) { i->e = NULL; return; } - } while(cur->hdr.key == NULL); - i->e = cur; +void upb_inttable_next(upb_inttable_iter *iter) { + const upb_inttable *t = iter->t; + if (iter->array_part) { + for (size_t i = iter->arrkey; ++i < t->array_size; ) + if (upb_arrhas(t->array[i])) { + iter->ptr.val = &t->array[i]; + iter->arrkey = i; + return; + } + iter->array_part = false; + iter->ptr.ent = t->t.entries - 1; + } + iter->ptr.ent = upb_table_next(&t->t, iter->ptr.ent); } #ifdef UPB_UNALIGNED_READS_OK @@ -413,8 +351,7 @@ void upb_strtable_next(upb_strtable_iter *i) { // 1. It will not work incrementally. // 2. It will not produce the same results on little-endian and big-endian // machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) -{ +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const uint32_t m = 0x5bd1e995; @@ -465,8 +402,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) -{ +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { const uint32_t m = 0x5bd1e995; const int32_t r = 24; const uint8_t * data = (const uint8_t *)key; diff --git a/upb/table.h b/upb/table.h index 0c0a785..f6bff66 100644 --- a/upb/table.h +++ b/upb/table.h @@ -4,13 +4,16 @@ * Copyright (c) 2009 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * This file defines very fast int->struct (inttable) and string->struct - * (strtable) hash tables. The struct can be of any size, and it is stored - * in the table itself, for cache-friendly performance. + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. * - * The table uses internal chaining with Brent's variation (inspired by the - * Lua implementation of hash tables). The hash function for strings is - * Austin Appleby's "MurmurHash." + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). * * This header is internal to upb; its interface should not be considered * public or stable. @@ -19,52 +22,30 @@ #ifndef UPB_TABLE_H_ #define UPB_TABLE_H_ -#include <assert.h> #include <stddef.h> +#include <stdint.h> #include "upb.h" #ifdef __cplusplus extern "C" { #endif -#define UPB_END_OF_CHAIN (uint32_t)-1 - -typedef struct { - bool has_entry:1; - // The rest of the bits are the user's. -} upb_inttable_value; - -typedef struct { - uint32_t key; - uint32_t next; // Internal chaining. -} upb_inttable_header; - -typedef struct { - upb_inttable_header hdr; - upb_inttable_value val; -} upb_inttable_entry; - -// TODO: consider storing the hash in the entry. This would avoid the need to -// rehash on table resizes, but more importantly could possibly improve lookup -// performance by letting us compare hashes before comparing lengths or the -// strings themselves. -typedef struct { - char *key; // We own, nullz. TODO: store explicit len? - uint32_t next; // Internal chaining. -} upb_strtable_header; +typedef union { + uintptr_t num; + char *str; // We own, nullz. +} upb_tabkey; -typedef struct { - upb_strtable_header hdr; - uint32_t val; // Val is at least 32 bits. -} upb_strtable_entry; +typedef struct _upb_tabent { + upb_tabkey key; + upb_value val; + struct _upb_tabent *next; // Internal chaining. +} upb_tabent; typedef struct { - void *entries; // Hash table. - uint32_t count; // Number of entries in the hash part. - uint32_t mask; // Mask to turn hash value -> bucket. - uint16_t entry_size; // Size of each entry. - uint16_t value_size; // Size of each value. - uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. + upb_tabent *entries; // Hash table. + size_t count; // Number of entries in the hash part. + size_t mask; // Mask to turn hash value -> bucket. + uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. } upb_table; typedef struct { @@ -72,149 +53,124 @@ typedef struct { } upb_strtable; typedef struct { - upb_table t; - void *array; // Array part of the table. - uint32_t array_size; // Array part size. - uint32_t array_count; // Array part number of elements. + upb_table t; // For entries that don't fit in the array part. + upb_value *array; // Array part of the table. + size_t array_size; // Array part size. + size_t array_count; // Array part number of elements. } upb_inttable; -// Initialize and free a table, respectively. Specify the initial size -// with 'size' (the size will be increased as necessary). Value size -// specifies how many bytes each value in the table is. -// -// WARNING! The lowest bit of every entry is reserved by the hash table. -// It will always be overwritten when you insert, and must not be modified -// when looked up! -void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size); -void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size); -void upb_strtable_free(upb_strtable *table); - -// Number of values in the hash table. -INLINE uint32_t upb_table_count(const upb_table *t) { return t->count; } -INLINE uint32_t upb_inttable_count(const upb_inttable *t) { - return t->array_count + upb_table_count(&t->t); -} -INLINE uint32_t upb_strtable_count(const upb_strtable *t) { - return upb_table_count(&t->t); +INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } + +INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { + return t->entries + ((uint32_t)key.num & t->mask); } -// Inserts the given key into the hashtable with the given value. The key must -// not already exist in the hash table. The data will be copied from val into -// the hashtable (the amount of data copied comes from value_size when the -// table was constructed). Therefore the data at val may be freed once the -// call returns. For string tables, the table takes ownership of the string. -// -// WARNING: the lowest bit of val is reserved and will be overwritten! -void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val); -// TODO: may want to allow for more complex keys with custom hash/comparison -// functions. -void upb_strtable_insert(upb_strtable *t, const char *key, const void *val); -void upb_inttable_compact(upb_inttable *t); +INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; } -INLINE uint32_t _upb_inttable_bucket(const upb_inttable *t, uint32_t k) { - uint32_t bucket = k & t->t.mask; // Identity hash for ints. - assert(bucket != UPB_END_OF_CHAIN); - return bucket; -} +// Initialize and uninitialize a table, respectively. If memory allocation +// failed, false is returned that the table is uninitialized. +bool upb_inttable_init(upb_inttable *table); +bool upb_strtable_init(upb_strtable *table); +void upb_inttable_uninit(upb_inttable *table); +void upb_strtable_uninit(upb_strtable *table); -// Returns true if this key belongs in the array part of the table. -INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) { - return (k < t->array_size); -} +// Returns the number of values in the table. +size_t upb_inttable_count(const upb_inttable *t); +INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -// Looks up key in this table, returning a pointer to the user's inserted data. -// We have the caller specify the entry_size because fixing this as a literal -// (instead of reading table->entry_size) gives the compiler more ability to -// optimize. +// Inserts the given key into the hashtable with the given value. The key must +// not already exist in the hash table. For string tables, the key must be +// NULL-terminated, and the table will make an internal copy of the key. +// Inttables must not insert a value of UINTPTR_MAX. // -// Note: All returned pointers are invalidated by inserts! -INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key, - size_t entry_size, size_t value_size) { - upb_inttable_value *arrval = - (upb_inttable_value*)UPB_INDEX(t->array, key, value_size); - if (_upb_inttable_isarrkey(t, key)) { - return (arrval->has_entry) ? arrval : NULL; +// If a table resize was required but memory allocation failed, false is +// returned and the table is unchanged. +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val); +bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); + +// Looks up key in this table, returning a pointer to the table's internal copy +// of the user's inserted data, or NULL if this key is not in the table. The +// user is free to modify the given upb_value, which will be reflected in any +// future lookups of this key. The returned pointer is invalidated by inserts. +upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); +upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); + +// Removes an item from the table. Returns true if the remove was successful, +// and stores the removed item in *val if non-NULL. +bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); + +// Optimizes the table for the current set of entries, for both memory use and +// lookup time. Client should call this after all entries have been inserted; +// inserting more entries is legal, but will likely require a table resize. +void upb_inttable_compact(upb_inttable *t); + +// A special-case inlinable version of the lookup routine for 32-bit integers. +INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { + if (key < t->array_size) { + upb_value *v = &t->array[key]; + return upb_arrhas(*v) ? v : NULL; } - uint32_t bucket = _upb_inttable_bucket(t, key); - upb_inttable_entry *e = - (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size); - while (1) { - if (e->hdr.key == key) { - return &e->val; - } - if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL; - e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size); + for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + if ((uint32_t)e->key.num == key) return &e->val; + if (e->next == NULL) return NULL; } } -INLINE size_t _upb_inttable_entrysize(size_t value_size) { - return upb_align_up(sizeof(upb_inttable_header) + value_size, 8); -} - -INLINE void *upb_inttable_fastlookup(const upb_inttable *t, uint32_t key, - uint32_t value_size) { - return _upb_inttable_fastlookup( - t, key, _upb_inttable_entrysize(value_size), value_size); -} - -INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { - return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size); -} - -void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len); -void *upb_strtable_lookup(const upb_strtable *t, const char *key); - /* upb_strtable_iter **********************************************************/ // Strtable iteration. Order is undefined. Insertions invalidate iterators. // upb_strtable_iter i; -// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) { +// upb_strtable_begin(&i, t); +// for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { // const char *key = upb_strtable_iter_key(&i); // const myval *val = upb_strtable_iter_value(&i); // // ... // } typedef struct { const upb_strtable *t; - upb_strtable_entry *e; + upb_tabent *e; } upb_strtable_iter; void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); void upb_strtable_next(upb_strtable_iter *i); INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; } INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) { - return i->e->hdr.key; + return i->e->key.str; } -INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) { - return &i->e->val; +INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { + return i->e->val; } /* upb_inttable_iter **********************************************************/ // Inttable iteration. Order is undefined. Insertions invalidate iterators. -// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); -// i = upb_inttable_next(t, i)) { +// upb_inttable_iter i; +// upb_inttable_begin(&i, t); +// for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { // // ... // } typedef struct { - uint32_t key; - upb_inttable_value *value; + const upb_inttable *t; + union { + upb_tabent *ent; // For hash iteration. + upb_value *val; // For array iteration. + } ptr; + uintptr_t arrkey; bool array_part; } upb_inttable_iter; -upb_inttable_iter upb_inttable_begin(const upb_inttable *t); -upb_inttable_iter upb_inttable_next(const upb_inttable *t, - upb_inttable_iter iter); -INLINE bool upb_inttable_done(upb_inttable_iter iter) { - return iter.value == NULL; +void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t); +void upb_inttable_next(upb_inttable_iter *i); +INLINE bool upb_inttable_done(upb_inttable_iter *i) { + return i->ptr.ent == NULL; } -INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) { - return iter.key; +INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) { + return i->array_part ? i->arrkey : i->ptr.ent->key.num; } -INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) { - return iter.value; +INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) { + return i->array_part ? *i->ptr.val : i->ptr.ent->val; } #ifdef __cplusplus @@ -1,47 +1,17 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> */ #include <errno.h> #include <stdarg.h> #include <stddef.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> -#include "upb/descriptor_const.h" #include "upb/upb.h" -#include "upb/bytestream.h" - -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \ - {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \ - #ctype, is_numeric}, - -const upb_type_info upb_types[] = { - // END_GROUP is not real, but used to signify the pseudo-field that - // ends a group from within the group. - TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE, false) // ENDGROUP - TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE, true) // DOUBLE - TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT, true) // FLOAT - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // INT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64, true) // UINT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // INT32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64, true) // FIXED64 - TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32, true) // FIXED32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL, true) // BOOL - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // STRING - TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE, false) // GROUP - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE, false) // MESSAGE - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // BYTES - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32, true) // UINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32, true) // ENUM - TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32, true) // SFIXED32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64, true) // SFIXED64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // SINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // SINT64 -}; #ifdef NDEBUG upb_value UPB_NO_VALUE = {{0}}; @@ -142,8 +112,9 @@ bool upb_errno_is_wouldblock() { bool upb_posix_codetostr(int code, char *buf, size_t len) { if (strerror_r(code, buf, len) == -1) { if (errno == EINVAL) { - int n = snprintf(buf, len, "Invalid POSIX error number %d\n", code); - return n >= (int)len; + size_t actual_len = + snprintf(buf, len, "Invalid POSIX error number %d\n", code); + return actual_len >= len; } else if (errno == ERANGE) { return false; } @@ -15,9 +15,6 @@ #include <stdbool.h> #include <stddef.h> #include <stdint.h> -#include <string.h> -#include "descriptor_const.h" -#include "atomic.h" #ifdef __cplusplus extern "C" { @@ -36,20 +33,6 @@ extern "C" { #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) -#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) - -INLINE void nop_printf(const char *fmt, ...) { (void)fmt; } - -#ifdef NDEBUG -#define DEBUGPRINTF nop_printf -#else -#define DEBUGPRINTF printf -#endif - -// Rounds val up to the next multiple of align. -INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) { - return val % align == 0 ? val : val + align - (val % align); -} // The maximum that any submessages can be nested. Matches proto2's limit. // At the moment this specifies the size of several statically-sized arrays @@ -94,73 +77,46 @@ INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) { #define UPB_MAX_TYPE_DEPTH 64 -/* Fundamental types and type constants. **************************************/ - -// A list of types as they are encoded on-the-wire. -enum upb_wire_type { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5, -}; - -// Type of a field as defined in a .proto file. eg. string, int32, etc. The -// integers that represent this are defined by descriptor.proto. Note that -// descriptor.proto reserves "0" for errors, and we use it to represent -// exceptional circumstances. -typedef uint8_t upb_fieldtype_t; - -// For referencing the type constants tersely. -#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ ## type -#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_ ## type - -// Info for a given field type. -typedef struct { - uint8_t align; - uint8_t size; - uint8_t native_wire_type; - uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 - const char *ctype; - bool is_numeric; // Only numeric types can be packed. -} upb_type_info; - -// A static array of info about all of the field types, indexed by type number. -extern const upb_type_info upb_types[]; - - /* upb_value ******************************************************************/ +// Clients should not need to access these enum values; they are used internally +// to do typechecks of upb_value accesses. +typedef enum { + UPB_CTYPE_INT32 = 1, + UPB_CTYPE_INT64 = 2, + UPB_CTYPE_UINT32 = 3, + UPB_CTYPE_UINT64 = 4, + UPB_CTYPE_DOUBLE = 5, + UPB_CTYPE_FLOAT = 6, + UPB_CTYPE_BOOL = 7, + UPB_CTYPE_PTR = 8, + UPB_CTYPE_BYTEREGION = 9, + UPB_CTYPE_FIELDDEF = 10, +} upb_ctype_t; + struct _upb_byteregion; struct _upb_fielddef; -// Special constants for the upb_value.type field. These must not conflict -// with any members of FieldDescriptorProto.Type. -#define UPB_TYPE_ENDGROUP 0 -#define UPB_VALUETYPE_FIELDDEF 32 -#define UPB_VALUETYPE_PTR 33 - // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { union { uint64_t uint64; - double _double; - float _float; int32_t int32; int64_t int64; uint32_t uint32; + double _double; + float _float; bool _bool; + void *_void; struct _upb_byteregion *byteregion; const struct _upb_fielddef *fielddef; - void *_void; } val; #ifndef NDEBUG // In debug mode we carry the value type around also so we can check accesses // to be sure the right member is being read. - char type; + upb_ctype_t type; #endif } upb_value; @@ -185,7 +141,7 @@ typedef struct { return val.val.membername; \ } \ INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ - memset(val, 0, sizeof(*val)); \ + val->val.uint64 = 0; \ SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } \ @@ -195,21 +151,23 @@ typedef struct { return ret; \ } -UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); -UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); -UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); -UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_CTYPE_INT32); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_CTYPE_INT64); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_CTYPE_UINT32); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_CTYPE_DOUBLE); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_CTYPE_FLOAT); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_CTYPE_BOOL); +UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_CTYPE_PTR); UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*, - UPB_TYPE(STRING)); + UPB_CTYPE_BYTEREGION); // upb_fielddef should never be modified from a callback // (ie. when they're getting passed through a upb_value). UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*, - UPB_VALUETYPE_FIELDDEF); + UPB_CTYPE_FIELDDEF); + +#undef UPB_VALUE_ACCESSORS extern upb_value UPB_NO_VALUE; @@ -262,7 +220,7 @@ void upb_status_copy(upb_status *to, const upb_status *from); extern upb_errorspace upb_posix_errorspace; void upb_status_fromerrno(upb_status *status); -bool upb_errno_is_wouldblock(void); +bool upb_errno_is_wouldblock(); // Like vasprintf (which allocates a string large enough for the result), but // uses *buf (which can be NULL) as a starting point and reallocates it only if |