summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2011-06-17 10:34:29 -0700
committerJoshua Haberman <jhaberman@gmail.com>2011-06-17 10:34:29 -0700
commit559e23c796f973a65d05c76e211835b126ee8ac8 (patch)
treeec359628f3b93b88718cc34e5ec86ca6e6c6c1e2 /src
parenta503b8859c37906ab5012db163daca43bfe393bb (diff)
Major refactoring: abandon upb_msg, add upb_accessors.
Next on the chopping block is upb_string.
Diffstat (limited to 'src')
-rw-r--r--src/upb.h7
-rw-r--r--src/upb_decoder.c10
-rw-r--r--src/upb_decoder.h12
-rw-r--r--src/upb_def.c1458
-rw-r--r--src/upb_def.h371
-rw-r--r--src/upb_descriptor.c548
-rw-r--r--src/upb_descriptor.h67
-rw-r--r--src/upb_glue.c49
-rw-r--r--src/upb_glue.h8
-rw-r--r--src/upb_handlers.c4
-rw-r--r--src/upb_msg.c611
-rw-r--r--src/upb_msg.h429
-rw-r--r--src/upb_string.h6
-rw-r--r--src/upb_table.h6
14 files changed, 1806 insertions, 1780 deletions
diff --git a/src/upb.h b/src/upb.h
index d3e7b34..59429f4 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -146,6 +146,7 @@ typedef uint8_t upb_valuetype_t;
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
+#define UPB_VALUETYPE_PTR 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
@@ -196,11 +197,9 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
-UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING));
-UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE));
-UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY);
-UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC);
+UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction.
UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
+UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
extern upb_value UPB_NO_VALUE;
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 34cd811..a44b561 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -11,6 +11,7 @@
#include "upb_bytestream.h"
#include "upb_decoder.h"
#include "upb_varint.h"
+#include "upb_msg.h"
// Used for frames that have no specific end offset: groups, repeated primitive
// fields inside groups, and the top-level message.
@@ -346,7 +347,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
d->ptr = d->buf + bottom->end_offset;
}
-void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(
&d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
#ifdef UPB_USE_JIT_X64
@@ -388,6 +389,13 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
}
}
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
+ upb_handlers *h = upb_handlers_new();
+ upb_accessors_reghandlers(h, m);
+ upb_decoder_initforhandlers(d, h);
+ upb_handlers_unref(h);
+}
+
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
d->bytesrc = bytesrc;
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index a98b235..e9bc0b4 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -91,8 +91,16 @@ typedef struct {
struct _upb_decoder;
typedef struct _upb_decoder upb_decoder;
-// Allocates and frees a upb_decoder, respectively.
-void upb_decoder_init(upb_decoder *d, upb_handlers *handlers);
+// Initializes/uninitializes a decoder for calling into the given handlers
+// or to write into the given msgdef, given its accessors). Takes a ref
+// on the handlers or msgdef.
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
+
+// Equivalent to:
+// upb_accessors_reghandlers(m, h);
+// upb_decoder_initforhandlers(d, h);
+// except possibly more efficient, by using cached state in the msgdef.
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
diff --git a/src/upb_def.c b/src/upb_def.c
index 791b885..45e7f73 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -7,30 +7,10 @@
#include <stdlib.h>
#include <stddef.h>
-#include <errno.h>
#include "upb_def.h"
-#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
-static int upb_div_round_up(int numerator, int denominator) {
- /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
- return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
-}
-
-/* Joins strings together, for example:
- * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
- * join("", "Baz") -> "Baz"
- * Caller owns a ref on the returned string. */
-static upb_string *upb_join(upb_string *base, upb_string *name) {
- if (!base || upb_string_len(base) == 0) {
- return upb_string_getref(name);
- } else {
- return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
- UPB_STRARG(base), UPB_STRARG(name));
- }
-}
-
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
{
@@ -39,181 +19,18 @@ static int my_memrchr(char *data, char c, size_t len)
return off;
}
-/* upb_def ********************************************************************/
-
-// Defs are reference counted, but can have cycles when types are
-// self-recursive or mutually recursive, so we need to be capable of collecting
-// the cycles. In our situation defs are immutable (so cycles cannot be
-// created or destroyed post-initialization). We need to be thread-safe but
-// want to avoid locks if at all possible and rely only on atomic operations.
-//
-// Our scheme is as follows. First we give each def a flag indicating whether
-// it is part of a cycle or not. Because defs are immutable, this flag will
-// never change. For acyclic defs, we can use a naive algorithm and avoid the
-// overhead of dealing with cycles. Most defs will be acyclic, and most cycles
-// will be very short.
-//
-// For defs that participate in cycles we keep two reference counts. One
-// tracks references that come from outside the cycle (we call these external
-// references), and is incremented and decremented like a regular refcount.
-// The other is a cycle refcount, and works as follows. Every cycle is
-// considered distinct, even if two cycles share members. For example, this
-// graph has two distinct cycles:
-//
-// A-->B-->C
-// ^ | |
-// +---+---+
-//
-// The cycles in this graph are AB and ABC. When A's external refcount
-// transitions from 0->1, we say that A takes "cycle references" on both
-// cycles. Taking a cycle reference means incrementing the cycle refcount of
-// all defs in the cycle. Since A and B are common to both cycles, A and B's
-// cycle refcounts will be incremented by two, and C's will be incremented by
-// one. Likewise, when A's external refcount transitions from 1->0, we
-// decrement A and B's cycle refcounts by two and C's by one. We collect a
-// cyclic type when its cycle refcount drops to zero. A precondition for this
-// is that the external refcount has dropped to zero also.
-//
-// This algorithm is relatively cheap, since it only requires extra work when
-// the external refcount on a cyclic type transitions from 0->1 or 1->0.
-
-static void upb_msgdef_free(upb_msgdef *m);
-static void upb_enumdef_free(upb_enumdef *e);
-static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-
-static void upb_def_free(upb_def *def)
-{
- switch(def->type) {
- case UPB_DEF_MSG:
- upb_msgdef_free(upb_downcast_msgdef(def));
- break;
- case UPB_DEF_ENUM:
- upb_enumdef_free(upb_downcast_enumdef(def));
- break;
- case UPB_DEF_SVC:
- assert(false); /* Unimplemented. */
- break;
- case UPB_DEF_UNRESOLVED:
- upb_unresolveddef_free(upb_downcast_unresolveddef(def));
- break;
- default:
- assert(false);
- }
-}
-
-// Depth-first search for all cycles that include cycle_base. Returns the
-// number of paths from def that lead to cycle_base, which is equivalent to the
-// number of cycles def is in that include cycle_base.
-//
-// open_defs tracks the set of nodes that are currently being visited in the
-// search so we can stop the search if we detect a cycles that do not involve
-// cycle_base. We can't color the nodes as we go by writing to a member of the
-// def, because another thread could be performing the search concurrently.
-static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base,
- upb_msgdef **open_defs, int num_open_defs,
- bool ref) {
- bool found = false;
- for(int i = 0; i < num_open_defs; i++) {
- if(open_defs[i] == m) {
- // We encountered a cycle that did not involve cycle_base.
- found = true;
- break;
- }
- }
-
- if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) {
- return 0;
- } else if(m == cycle_base) {
- return 1;
- } else {
- int path_count = 0;
- if(cycle_base == NULL) {
- cycle_base = m;
- } else {
- open_defs[num_open_defs++] = m;
- }
- upb_msg_iter iter = upb_msg_begin(m);
- for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) {
- upb_fielddef *f = upb_msg_iter_field(iter);
- upb_def *def = f->def;
- if(upb_issubmsg(f) && def->is_cyclic) {
- upb_msgdef *sub_m = upb_downcast_msgdef(def);
- path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs,
- num_open_defs, ref);
- }
- }
- if(ref) {
- upb_atomic_add(&m->cycle_refcount, path_count);
- } else {
- if(upb_atomic_add(&m->cycle_refcount, -path_count))
- upb_def_free(UPB_UPCAST(m));
- }
- return path_count;
- }
-}
-
-void _upb_def_reftozero(upb_def *def) {
- if(def->is_cyclic) {
- upb_msgdef *m = upb_downcast_msgdef(def);
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false);
- } else {
- upb_def_free(def);
- }
-}
-
-void _upb_def_cyclic_ref(upb_def *def) {
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true);
-}
-
-static void upb_def_init(upb_def *def, upb_deftype type) {
- def->type = type;
- def->is_cyclic = 0; // We detect this later, after resolving refs.
- def->search_depth = 0;
- def->fqname = NULL;
- upb_atomic_init(&def->refcount, 1);
-}
-
-static void upb_def_uninit(upb_def *def) {
- upb_string_unref(def->fqname);
-}
-
-
-/* upb_defbuilder ************************************************************/
-
-// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in
-// the format defined in descriptor.proto. The output of a upb_defbuilder is
-// a list of upb_def* that possibly contain unresolved references.
-//
-// We use a separate object (upb_defbuilder) instead of having the defs handle
-// the parse themselves because we need to store state that is only necessary
-// during the building process itself.
-//
-// All of the handlers registration in this file must be done using the
-// low-level upb_register_typed_* interface, since we might not have a msgdef
-// yet (in the case of bootstrapping). This makes it more laborious than it
-// will be for real users.
-
-// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
-typedef struct {
- upb_def **defs;
- uint32_t len;
- uint32_t size;
-} upb_deflist;
-
-static void upb_deflist_init(upb_deflist *l) {
+void upb_deflist_init(upb_deflist *l) {
l->size = 8;
l->defs = malloc(l->size * sizeof(void*));
l->len = 0;
}
-static void upb_deflist_uninit(upb_deflist *l) {
+void upb_deflist_uninit(upb_deflist *l) {
for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]);
free(l->defs);
}
-static void upb_deflist_push(upb_deflist *l, upb_def *d) {
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
if(l->len == l->size) {
l->size *= 2;
l->defs = realloc(l->defs, l->size * sizeof(void*));
@@ -221,179 +38,74 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) {
l->defs[l->len++] = d;
}
-static upb_def *upb_deflist_last(upb_deflist *l) {
- return l->defs[l->len-1];
-}
-
-// Qualify the defname for all defs starting with offset "start" with "str".
-static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
- for(uint32_t i = start; i < l->len; i++) {
- upb_def *def = l->defs[i];
- upb_string *name = def->fqname;
- def->fqname = upb_join(str, name);
- upb_string_unref(name);
- }
-}
-
-// We keep a stack of all the messages scopes we are currently in, as well as
-// the top-level file scope. This is necessary to correctly qualify the
-// definitions that are contained inside. "name" tracks the name of the
-// message or package (a bare name -- not qualified by any enclosing scopes).
-typedef struct {
- upb_string *name;
- // Index of the first def that is under this scope. For msgdefs, the
- // msgdef itself is at start-1.
- int start;
-} upb_defbuilder_frame;
-
-struct _upb_defbuilder {
- upb_deflist defs;
- upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH];
- int stack_len;
- upb_status status;
- upb_symtab *symtab;
-
- uint32_t number;
- upb_string *name;
- bool saw_number;
- bool saw_name;
-
- upb_string *default_string;
-
- upb_fielddef *f;
-};
-
-// Forward declares for top-level file descriptors.
-static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
-static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
-
-upb_defbuilder *upb_defbuilder_new(upb_symtab *s) {
- upb_defbuilder *b = malloc(sizeof(*b));
- upb_deflist_init(&b->defs);
- upb_status_init(&b->status);
- b->symtab = s;
- b->stack_len = 0;
- b->name = NULL;
- b->default_string = NULL;
- return b;
-}
-
-static void upb_defbuilder_free(upb_defbuilder *b) {
- upb_string_unref(b->name);
- upb_status_uninit(&b->status);
- upb_deflist_uninit(&b->defs);
- upb_string_unref(b->default_string);
- while (b->stack_len > 0) {
- upb_defbuilder_frame *f = &b->stack[--b->stack_len];
- upb_string_unref(f->name);
- }
- free(b);
-}
-
-static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
- if (b->stack_len <= 1) return NULL;
- int index = b->stack[b->stack_len-1].start - 1;
- assert(index >= 0);
- return upb_downcast_msgdef(b->defs.defs[index]);
-}
-static upb_def *upb_defbuilder_last(upb_defbuilder *b) {
- return upb_deflist_last(&b->defs);
-}
-
-// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
-// entities that have names and can contain sub-definitions.
-void upb_defbuilder_startcontainer(upb_defbuilder *b) {
- upb_defbuilder_frame *f = &b->stack[b->stack_len++];
- f->start = b->defs.len;
- f->name = NULL;
-}
+/* upb_def ********************************************************************/
-void upb_defbuilder_endcontainer(upb_defbuilder *b) {
- upb_defbuilder_frame *f = &b->stack[--b->stack_len];
- upb_deflist_qualify(&b->defs, f->name, f->start);
- upb_string_unref(f->name);
-}
+static void upb_msgdef_free(upb_msgdef *m);
+static void upb_enumdef_free(upb_enumdef *e);
+static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) {
- upb_defbuilder_frame *f = &b->stack[b->stack_len-1];
- upb_string_unref(f->name);
- f->name = upb_string_getref(str);
-}
+#ifndef NDEBUG
+static bool upb_def_ismutable(upb_def *def) { return def->symtab == NULL; }
+#endif
-// Handlers for google.protobuf.FileDescriptorProto.
-static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_defbuilder_startcontainer(b);
- return UPB_CONTINUE;
+static void upb_def_free(upb_def *def) {
+ switch (def->type) {
+ case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
+ case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
+ case UPB_DEF_UNRESOLVED:
+ upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break;
+ default:
+ assert(false);
+ }
}
-static void upb_defbuilder_FileDescriptorProto_endmsg(void *_b,
- upb_status *status) {
- (void)status;
- upb_defbuilder *b = _b;
- upb_defbuilder_endcontainer(b);
+upb_def *upb_def_dup(upb_def *def) {
+ switch (def->type) {
+ case UPB_DEF_MSG: return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def)));
+ case UPB_DEF_ENUM: return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def)));
+ default: assert(false); return NULL;
+ }
}
-static upb_flow_t upb_defbuilder_FileDescriptorProto_package(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_defbuilder_setscopename(b, upb_value_getstr(val));
- return UPB_CONTINUE;
+// Prior to being in a symtab, the def's refcount controls the lifetime of the
+// def itself. If the refcount falls to zero, the def is deleted. Once the
+// def belongs to a symtab, the def is owned by the symtab and its refcount
+// determines whether the def owns a ref on the symtab or not.
+void upb_def_ref(upb_def *def) {
+ if (upb_atomic_ref(&def->refcount) && def->symtab)
+ upb_symtab_ref(def->symtab);
}
-static upb_mhandlers *upb_defbuilder_register_FileDescriptorProto(
- upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_defbuilder_FileDescriptorProto_startmsg);
- upb_mhandlers_setendmsg(m, &upb_defbuilder_FileDescriptorProto_endmsg);
-
-#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
-#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
- upb_fhandlers *f =
- upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
- upb_fhandlers_setvalue(f, &upb_defbuilder_FileDescriptorProto_package);
-
- upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
- upb_msgdef_register_DescriptorProto(h));
- upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
- upb_enumdef_register_EnumDescriptorProto(h));
- // TODO: services, extensions
- return m;
+static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) {
+ assert(upb_atomic_read(&d->refcount) > 0);
+ d->symtab = s;
+ if (!upb_atomic_unref(&d->refcount)) upb_symtab_ref(s);
+ upb_msgdef *m = upb_dyncast_msgdef(d);
+ if (m) upb_inttable_compact(&m->itof);
}
-#undef FNUM
-#undef FTYPE
-// Handlers for google.protobuf.FileDescriptorSet.
-static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs,
- bool allow_redef, upb_status *status);
-
-static void upb_defbuilder_FileDescriptorSet_onendmsg(void *_b,
- upb_status *status) {
- upb_defbuilder *b = _b;
- if (upb_ok(status))
- upb_symtab_add_defs(b->symtab, b->defs.defs, b->defs.len, false, status);
- upb_defbuilder_free(b);
+void upb_def_unref(upb_def *def) {
+ if (!def) return;
+ if (upb_atomic_unref(&def->refcount)) {
+ if (def->symtab) {
+ upb_symtab_unref(def->symtab);
+ // Def might be deleted now.
+ } else {
+ upb_def_free(def);
+ }
+ }
}
-static upb_mhandlers *upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setendmsg(m, upb_defbuilder_FileDescriptorSet_onendmsg);
-
-#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
-#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
- upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
- upb_defbuilder_register_FileDescriptorProto(h));
- return m;
+static void upb_def_init(upb_def *def, upb_deftype_t type) {
+ def->type = type;
+ def->fqname = NULL;
+ def->symtab = NULL;
+ upb_atomic_init(&def->refcount, 1);
}
-#undef FNUM
-#undef FTYPE
-upb_mhandlers *upb_defbuilder_reghandlers(upb_handlers *h) {
- h->should_jit = false;
- return upb_defbuilder_register_FileDescriptorSet(h);
+static void upb_def_uninit(upb_def *def) {
+ upb_string_unref(def->fqname);
}
@@ -428,6 +140,14 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
/* upb_enumdef ****************************************************************/
+upb_enumdef *upb_enumdef_new() {
+ upb_enumdef *e = malloc(sizeof(*e));
+ upb_def_init(&e->base, UPB_DEF_ENUM);
+ upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
+ upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
+ return e;
+}
+
static void upb_enumdef_free(upb_enumdef *e) {
upb_enum_iter i;
for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
@@ -440,129 +160,29 @@ static void upb_enumdef_free(upb_enumdef *e) {
free(e);
}
-// google.protobuf.EnumValueDescriptorProto.
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- b->saw_number = false;
- b->saw_name = false;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_string_unref(b->name);
- b->name = upb_string_getref(upb_value_getstr(val));
- b->saw_name = true;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->number = upb_value_getint32(val);
- b->saw_number = true;
- return UPB_CONTINUE;
-}
-
-static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b,
- upb_status *status) {
- upb_defbuilder *b = _b;
- if(!b->saw_number || !b->saw_name) {
- upb_seterr(status, UPB_ERROR, "Enum value missing name or number.");
- return;
- }
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- if (upb_inttable_count(&e->iton) == 0) {
- // The default value of an enum (in the absence of an explicit default) is
- // its first listed value.
- e->default_value = b->number;
- }
- upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number};
- upb_iton_ent iton_ent = {0, b->name};
- upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
- upb_inttable_insert(&e->iton, b->number, &iton_ent);
- // We don't unref "name" because we pass our ref to the iton entry of the
- // table. strtables can ref their keys, but the inttable doesn't know that
- // the value is a string.
- b->name = NULL;
-}
-
-static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
- upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
- upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
- upb_fhandlers *f;
- f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
- upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
-
- f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
- upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
- return m;
-}
-#undef FNUM
-#undef FTYPE
-
-// google.protobuf.EnumDescriptorProto.
-static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_enumdef *e = malloc(sizeof(*e));
- upb_def_init(&e->base, UPB_DEF_ENUM);
- upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
- upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
- upb_deflist_push(&b->defs, UPB_UPCAST(e));
- return UPB_CONTINUE;
-}
-
-static void upb_enumdef_EnumDescriptorProto_endmsg(void *_b, upb_status *status) {
- upb_defbuilder *b = _b;
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- if (upb_defbuilder_last((upb_defbuilder*)_b)->fqname == NULL) {
- upb_seterr(status, UPB_ERROR, "Enum had no name.");
- return;
- }
- if (upb_inttable_count(&e->iton) == 0) {
- upb_seterr(status, UPB_ERROR, "Enum had no values.");
- return;
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
+ upb_enumdef *new_e = upb_enumdef_new();
+ upb_enum_iter i;
+ for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+ assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i),
+ upb_enum_iter_number(i)));
}
+ return new_e;
}
-static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_b,
- upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- upb_string_unref(e->base.fqname);
- e->base.fqname = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
+bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) {
+ if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false;
+ upb_ntoi_ent ntoi_ent = {{name, 0}, num};
+ upb_iton_ent iton_ent = {0, name};
+ upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
+ upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name
+ return true;
}
-static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
- upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
- upb_fhandlers *f =
- upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
- upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
-
- upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
- upb_enumdef_register_EnumValueDescriptorProto(h));
- return m;
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) {
+ assert(upb_def_ismutable(UPB_UPCAST(e)));
+ e->defaultval = val;
}
-#undef FNUM
-#undef FTYPE
upb_enum_iter upb_enum_begin(upb_enumdef *e) {
// We could iterate over either table here; the choice is arbitrary.
@@ -573,59 +193,89 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
return upb_inttable_next(&e->iton, iter);
}
-upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
+upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
upb_iton_ent *e =
(upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
return e ? e->string : NULL;
}
-bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, upb_enumval_t *num) {
+bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) {
upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name);
if (!e) return false;
- *num = e->value;
+ if (num) *num = e->value;
return true;
}
/* upb_fielddef ***************************************************************/
+upb_fielddef *upb_fielddef_new() {
+ upb_fielddef *f = malloc(sizeof(*f));
+ f->msgdef = NULL;
+ f->def = NULL;
+ upb_atomic_init(&f->refcount, 1);
+ f->finalized = false;
+ f->type = 0;
+ f->label = UPB_LABEL(OPTIONAL);
+ f->hasbit = 0;
+ f->offset = 0;
+ f->number = 0; // not a valid field number.
+ f->name = NULL;
+ f->accessor = NULL;
+ upb_value_setfielddef(&f->fval, f);
+ return f;
+}
+
static void upb_fielddef_free(upb_fielddef *f) {
if (upb_isstring(f)) {
- upb_string_unref(upb_value_getstr(f->default_value));
- } else if (upb_issubmsg(f)) {
- upb_msg *m = upb_value_getmsg(f->default_value);
- assert(m);
- // We cheat a bit here. We need to unref msg, but we don't have a reliable
- // way of accessing the msgdef (which is required by upb_msg_unref()),
- // because f->def may have already been collected as part of a cycle if
- // this is an unowned ref. But we know that default messages never contain
- // references to other messages, and their only string references are to
- // the singleton empty string, so we can safely unref+free msg directly.
- if (upb_atomic_unref(&m->refcount)) free(m);
+ upb_string_unref(upb_value_getstr(f->defaultval));
}
upb_string_unref(f->name);
- if(f->owned) {
- upb_def_unref(f->def);
- }
free(f);
}
+void upb_fielddef_ref(upb_fielddef *f) {
+ // TODO.
+ (void)f;
+}
+
+void upb_fielddef_unref(upb_fielddef *f) {
+ // TODO.
+ (void)f;
+ if (!f) return;
+ if (upb_atomic_unref(&f->refcount)) {
+ if (f->msgdef) {
+ upb_msgdef_unref(f->msgdef);
+ // fielddef might be deleted now.
+ } else {
+ upb_fielddef_free(f);
+ }
+ }
+}
+
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f) {
+ upb_fielddef *newf = upb_fielddef_new();
+ newf->msgdef = f->msgdef;
+ newf->type = f->type;
+ newf->label = f->label;
+ newf->number = f->number;
+ newf->name = f->name;
+ upb_fielddef_settypename(newf, f->def->fqname);
+ return f;
+}
+
static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
- if(f->owned) upb_def_unref(f->def);
+ assert(upb_dyncast_unresolveddef(f->def));
+ upb_def_unref(f->def);
f->def = def;
- // We will later make the ref unowned if it is a part of a cycle.
- f->owned = true;
- upb_def_ref(def);
- if (upb_issubmsg(f)) {
- upb_msgdef *md = upb_downcast_msgdef(def);
- upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
- } else if (f->type == UPB_TYPE(ENUM)) {
- upb_string *str = upb_value_getstr(f->default_value);
+ if (f->type == UPB_TYPE(ENUM)) {
+ // Resolve the enum's default from a string to an integer.
+ upb_string *str = upb_value_getstr(f->defaultval);
assert(str); // Should point to either a real default or the empty string.
upb_enumdef *e = upb_downcast_enumdef(f->def);
- upb_enumval_t val = 0;
+ int32_t val = 0;
if (str == upb_emptystring()) {
- upb_value_setint32(&f->default_value, e->default_value);
+ upb_value_setint32(&f->defaultval, e->defaultval);
} else {
bool success = upb_enumdef_ntoi(e, str, &val);
upb_string_unref(str);
@@ -634,368 +284,201 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
"member of the enum", UPB_STRARG(str));
return false;
}
- upb_value_setint32(&f->default_value, val);
+ upb_value_setint32(&f->defaultval, val);
}
}
return true;
}
-static upb_flow_t upb_fielddef_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_fielddef *f = malloc(sizeof(*f));
- f->number = -1;
- f->name = NULL;
- f->def = NULL;
- f->owned = false;
- f->msgdef = upb_defbuilder_top(b);
- b->f = f;
- return UPB_CONTINUE;
-}
-
-// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
-// Returns true on success.
-static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
- bool success = true;
- if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
- // We'll keep the ref we had on it. We include enums in this case because
- // we need the enumdef to resolve the name, but we may not have it yet.
- // We'll resolve it later.
- if (dstr) {
- upb_value_setstr(d, dstr);
- } else {
- upb_value_setstr(d, upb_emptystring());
- }
- } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
- // We don't expect to get a default value.
- upb_string_unref(dstr);
- if (dstr != NULL) success = false;
- } else {
- // The strto* functions need the string to be NULL-terminated.
- char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
- char *end;
- upb_string_unref(dstr);
- switch (type) {
- case UPB_TYPE(INT32):
- case UPB_TYPE(SINT32):
- case UPB_TYPE(SFIXED32):
- if (strz) {
- long val = strtol(strz, &end, 0);
- if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
- success = false;
- else
- upb_value_setint32(d, val);
- } else {
- upb_value_setint32(d, 0);
- }
- break;
- case UPB_TYPE(INT64):
- case UPB_TYPE(SINT64):
- case UPB_TYPE(SFIXED64):
- if (strz) {
- upb_value_setint64(d, strtoll(strz, &end, 0));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setint64(d, 0);
- }
- break;
- case UPB_TYPE(UINT32):
- case UPB_TYPE(FIXED32):
- if (strz) {
- unsigned long val = strtoul(strz, &end, 0);
- if (val > UINT32_MAX || errno == ERANGE || *end)
- success = false;
- else
- upb_value_setuint32(d, val);
- } else {
- upb_value_setuint32(d, 0);
- }
- break;
- case UPB_TYPE(UINT64):
- case UPB_TYPE(FIXED64):
- if (strz) {
- upb_value_setuint64(d, strtoull(strz, &end, 0));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setuint64(d, 0);
- }
- break;
- case UPB_TYPE(DOUBLE):
- if (strz) {
- upb_value_setdouble(d, strtod(strz, &end));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setdouble(d, 0.0);
- }
- break;
- case UPB_TYPE(FLOAT):
- if (strz) {
- upb_value_setfloat(d, strtof(strz, &end));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setfloat(d, 0.0);
- }
- break;
- case UPB_TYPE(BOOL):
- if (!strz || strcmp(strz, "false") == 0)
- upb_value_setbool(d, false);
- else if (strcmp(strz, "true") == 0)
- upb_value_setbool(d, true);
- else
- success = false;
- break;
- }
- free(strz);
- }
- return success;
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
+ assert(f->msgdef == NULL);
+ f->number = number;
}
-static void upb_fielddef_endmsg(void *_b, upb_status *status) {
- upb_defbuilder *b = _b;
- upb_fielddef *f = b->f;
- // TODO: verify that all required fields were present.
- assert(f->number != -1 && f->name != NULL);
- assert((f->def != NULL) == upb_hasdef(f));
+void upb_fielddef_setname(upb_fielddef *f, upb_string *name) {
+ assert(f->msgdef == NULL);
+ f->name = upb_string_getref(name);
+}
- // Field was successfully read, add it as a field of the msgdef.
- upb_msgdef *m = upb_defbuilder_top(b);
- upb_itof_ent itof_ent = {0, f->type, upb_types[f->type].native_wire_type, f};
- upb_ntof_ent ntof_ent = {{f->name, 0}, f};
- upb_inttable_insert(&m->itof, f->number, &itof_ent);
- upb_strtable_insert(&m->ntof, &ntof_ent.e);
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
+ assert(!f->finalized);
+ f->type = type;
+}
- upb_string *dstr = b->default_string;
- b->default_string = NULL;
- if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
- // We don't worry too much about giving a great error message since the
- // compiler should have ensured this was correct.
- upb_seterr(status, UPB_ERROR, "Error converting default value.");
- return;
- }
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) {
+ assert(!f->finalized);
+ f->label = label;
+}
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) {
+ assert(!f->finalized);
+ // TODO: string ownership?
+ f->defaultval = value;
}
-static upb_flow_t upb_fielddef_ontype(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->f->type = upb_value_getint32(val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_onlabel(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->f->label = upb_value_getint32(val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_onnumber(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- b->f->number = upb_value_getint32(val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_onname(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_string_unref(b->f->name);
- b->f->name = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_ontypename(void *_b, upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- upb_def_unref(b->f->def);
- b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val)));
- b->f->owned = true;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_ondefaultval(void *_b, upb_value fval,
- upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- // Have to convert from string to the correct type, but we might not know the
- // type yet.
- upb_string_unref(b->default_string);
- b->default_string = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
- upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
- upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
-
-#define FIELD(name, handler) \
- upb_fhandlers_setvalue( \
- upb_mhandlers_newfhandlers(m, \
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
- false), \
- handler);
- FIELD(TYPE, &upb_fielddef_ontype);
- FIELD(LABEL, &upb_fielddef_onlabel);
- FIELD(NUMBER, &upb_fielddef_onnumber);
- FIELD(NAME, &upb_fielddef_onname);
- FIELD(TYPE_NAME, &upb_fielddef_ontypename);
- FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
- return m;
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) {
+ assert(!f->finalized);
+ // TODO: string ownership?
+ f->fval = fval;
}
-#undef FNUM
-#undef FTYPE
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) {
+ assert(!f->finalized);
+ f->accessor = vtbl;
+}
-/* upb_msgdef *****************************************************************/
+void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) {
+ upb_def_unref(f->def);
+ f->def = UPB_UPCAST(upb_unresolveddef_new(name));
+}
-static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) {
- // Sort by data size (ascending) to reduce padding.
+// Returns an ordering of fields based on:
+// 1. value size (small to large).
+// 2. field number.
+static int upb_fielddef_cmpval(const void *_f1, const void *_f2) {
+ upb_fielddef *f1 = *(void**)_f1;
+ upb_fielddef *f2 = *(void**)_f2;
size_t size1 = upb_types[f1->type].size;
size_t size2 = upb_types[f2->type].size;
if (size1 != size2) return size1 - size2;
- // Otherwise return in number order (just so we get a reproduceable order.
+ // Otherwise return in number order.
return f1->number - f2->number;
}
-static int upb_compare_fields(const void *f1, const void *f2) {
- return upb_compare_typed_fields(*(void**)f1, *(void**)f2);
+// Returns an ordering of all fields based on:
+// 1. required/optional (required fields first).
+// 2. field number
+static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) {
+ upb_fielddef *f1 = *(void**)_f1;
+ upb_fielddef *f2 = *(void**)_f2;
+ size_t req1 = f1->label == UPB_LABEL(REQUIRED);
+ size_t req2 = f2->label == UPB_LABEL(REQUIRED);
+ if (req1 != req2) return req1 - req2;
+ // Otherwise return in number order.
+ return f1->number - f2->number;
}
-// google.protobuf.DescriptorProto.
-static upb_flow_t upb_msgdef_startmsg(void *_b) {
- upb_defbuilder *b = _b;
+
+/* upb_msgdef *****************************************************************/
+
+upb_msgdef *upb_msgdef_new() {
upb_msgdef *m = malloc(sizeof(*m));
upb_def_init(&m->base, UPB_DEF_MSG);
- upb_atomic_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
- m->default_message = NULL;
- upb_deflist_push(&b->defs, UPB_UPCAST(m));
- upb_defbuilder_startcontainer(b);
- return UPB_CONTINUE;
-}
-
-static void upb_msgdef_endmsg(void *_b, upb_status *status) {
- upb_defbuilder *b = _b;
- upb_msgdef *m = upb_defbuilder_top(b);
- if(!m->base.fqname) {
- upb_seterr(status, UPB_ERROR, "Encountered message with no name.");
- return;
+ m->size = 0;
+ m->hasbit_bytes = 0;
+ m->extension_start = 0;
+ m->extension_end = 0;
+ return m;
+}
+
+static void upb_msgdef_free(upb_msgdef *m) {
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+ upb_fielddef_free(upb_msg_iter_field(i));
+ upb_strtable_free(&m->ntof);
+ upb_inttable_free(&m->itof);
+ upb_def_uninit(&m->base);
+ free(m);
+}
+
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m) {
+ upb_msgdef *newm = upb_msgdef_new();
+ newm->size = m->size;
+ newm->hasbit_bytes = m->hasbit_bytes;
+ newm->extension_start = m->extension_start;
+ newm->extension_end = m->extension_end;
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+ upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i)));
+ return newm;
+}
+
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->size = size;
+}
+
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->hasbit_bytes = bytes;
+}
+
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->extension_start = start;
+}
+
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->extension_end = end;
+}
+
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
+ assert(upb_atomic_read(&f->refcount) > 0);
+ if (!upb_atomic_unref(&f->refcount)) upb_msgdef_ref(m);
+ if (upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) {
+ upb_fielddef_unref(f);
+ return false;
}
+ assert(f->msgdef == NULL);
+ f->msgdef = m;
+ upb_itof_ent itof_ent = {0, f};
+ upb_ntof_ent ntof_ent = {{f->name, 0}, f};
+ upb_inttable_insert(&m->itof, f->number, &itof_ent);
+ upb_strtable_insert(&m->ntof, &ntof_ent.e);
+ return true;
+}
+
+static int upb_div_round_up(int numerator, int denominator) {
+ /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
+ return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
+}
- upb_inttable_compact(&m->itof);
- // Create an ordering over the fields.
- int n = upb_msgdef_numfields(m);
- upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n);
- int field = 0;
+void upb_msgdef_layout(upb_msgdef *m) {
+ // Create an ordering over the fields, but only include fields with accessors.
+ upb_fielddef **sorted_fields =
+ malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m));
+ int n = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++] = upb_msg_iter_field(i);
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (f->accessor) sorted_fields[n++] = f;
}
- qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
- // Assign offsets in the msg.
- m->set_flags_bytes = upb_div_round_up(n, 8);
- m->size = sizeof(upb_atomic_t) + m->set_flags_bytes;
+ m->hasbit_bytes = upb_div_round_up(n, 8);
+ m->size = m->hasbit_bytes; // + header_size?
+ // Assign hasbits.
+ qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit);
+ for (int i = 0; i < n; i++) {
+ upb_fielddef *f = sorted_fields[i];
+ f->hasbit = i;
+ }
+
+ // Assign value offsets.
+ qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval);
size_t max_align = 0;
for (int i = 0; i < n; i++) {
upb_fielddef *f = sorted_fields[i];
const upb_type_info *type_info = &upb_types[f->type];
-
- // This identifies the set bit. When we implement is_initialized (a
- // general check about whether all required bits are set) we will probably
- // want to use a different ordering that puts all the required bits
- // together.
- f->field_index = i;
- f->set_bit_mask = 1 << (i % 8);
- f->set_bit_offset = i / 8;
-
- size_t size, align;
- if (upb_isarray(f)) {
+ size_t size = type_info->size;
+ size_t align = type_info->align;
+ if (upb_isseq(f)) {
size = sizeof(void*);
align = alignof(void*);
- } else {
- size = type_info->size;
- align = type_info->align;
}
+
// General alignment rules are: each member must be at an address that is a
// multiple of that type's alignment. Also, the size of the structure as a
// whole must be a multiple of the greatest alignment of any member.
- size_t offset = upb_align_up(m->size, align);
- // Offsets are relative to the end of the refcount.
- f->byte_offset = offset - sizeof(upb_atomic_t);
- m->size = offset + size;
+ f->offset = upb_align_up(m->size, align);
+ m->size = f->offset + size;
max_align = UPB_MAX(max_align, align);
}
- free(sorted_fields);
-
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
- // Create default message instance, an immutable message with all default
- // values set (except submessages, which are simply marked as unset). We
- // could alternatively leave all set bits unset, but this would make
- // upb_msg_get() take its unexpected branch more often for no good reason.
- m->default_message = upb_msg_new(m);
- for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
- upb_msg_set(m->default_message, f, f->default_value);
- }
- }
-
- upb_defbuilder_endcontainer(b);
-}
-
-static upb_flow_t upb_msgdef_onname(void *_b, upb_value fval, upb_value val) {
- (void)fval;
- upb_defbuilder *b = _b;
- assert(val.type == UPB_TYPE(STRING));
- upb_msgdef *m = upb_defbuilder_top(b);
- upb_string_unref(m->base.fqname);
- m->base.fqname = upb_string_getref(upb_value_getstr(val));
- upb_defbuilder_setscopename(b, upb_value_getstr(val));
- return UPB_CONTINUE;
-}
-
-static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
- upb_mhandlers *m = upb_handlers_newmhandlers(h);
- upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
- upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
-
-#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
-#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
- upb_fhandlers *f =
- upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
- upb_fhandlers_setvalue(f, &upb_msgdef_onname);
-
- upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
- upb_fielddef_register_FieldDescriptorProto(h));
- upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
- upb_enumdef_register_EnumDescriptorProto(h));
-
- // DescriptorProto is self-recursive, so we must link the definition.
- upb_mhandlers_newfhandlers_subm(
- m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
-
- // TODO: extensions.
- return m;
-}
-#undef FNUM
-#undef FTYPE
-
-static void upb_msgdef_free(upb_msgdef *m)
-{
- upb_msg_unref(m->default_message, m);
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
- upb_fielddef_free(upb_msg_iter_field(i));
- upb_strtable_free(&m->ntof);
- upb_inttable_free(&m->itof);
- upb_def_uninit(&m->base);
- free(m);
+ free(sorted_fields);
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@@ -1006,22 +489,49 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
return upb_inttable_next(&m->itof, iter);
}
-/* upb_symtab adding defs *****************************************************/
-// This is a self-contained group of functions that, given a list of upb_defs
-// whose references are not yet resolved, resolves references and adds them
-// atomically to a upb_symtab.
+/* upb_symtabtxn **************************************************************/
typedef struct {
upb_strtable_entry e;
upb_def *def;
} upb_symtab_ent;
+void upb_symtabtxn_init(upb_symtabtxn *t) {
+ upb_strtable_init(&t->deftab, 16, sizeof(upb_symtab_ent));
+}
+
+void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
+ upb_strtable *t = &txn->deftab;
+ upb_symtab_ent *e;
+ for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
+ upb_def_unref(e->def);
+ upb_strtable_free(t);
+}
+
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
+ // TODO: check if already present.
+ upb_symtab_ent e = {{def->fqname, 0}, def};
+ upb_strtable_insert(&t->deftab, &e.e);
+ return true;
+}
+
+#if 0
+err:
+ // We need to free all defs from "tmptab."
+ upb_rwlock_unlock(&s->lock);
+ for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
+ e = upb_strtable_next(&tmptab, &e->e)) {
+ upb_def_unref(e->def);
+ }
+ upb_strtable_free(&tmptab);
+ return false;
+#endif
+
// Given a symbol and the base symbol inside which it is defined, find the
// symbol's definition in t.
static upb_symtab_ent *upb_resolve(upb_strtable *t,
- upb_string *base, upb_string *sym)
-{
+ upb_string *base, upb_string *sym) {
if(upb_string_len(sym) == 0) return NULL;
if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
// Symbols starting with '.' are absolute, so we do a single lookup.
@@ -1060,212 +570,63 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t,
}
}
-// Performs a pass over the type graph to find all cycles that include m.
-static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
-{
- if(depth > UPB_MAX_TYPE_DEPTH) {
- // We have found a non-cyclic path from the base of the type tree that
- // exceeds the maximum allowed depth. There are many situations in upb
- // where we recurse over the type tree (like for example, right now) and an
- // absurdly deep tree could cause us to stack overflow on systems with very
- // limited stacks.
- upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at "
- "depth %d in the type graph, which exceeds the maximum type "
- "depth of %d.", UPB_UPCAST(m)->fqname, depth,
- UPB_MAX_TYPE_DEPTH);
- return false;
- } else if(UPB_UPCAST(m)->search_depth == 1) {
- // Cycle!
- int cycle_len = depth - 1;
- if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) {
- upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved "
- "in a cycle of length %d, which exceeds the maximum type "
- "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len,
- UPB_MAX_TYPE_CYCLE_LEN);
- return false;
- }
- return true;
- } else if(UPB_UPCAST(m)->search_depth > 0) {
- // This was a cycle, but did not originate from the base of our search tree.
- // We'll find it when we call find_cycles() on this node directly.
- return false;
- } else {
- UPB_UPCAST(m)->search_depth = ++depth;
- bool cycle_found = false;
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if(!upb_issubmsg(f)) continue;
- upb_def *sub_def = f->def;
- upb_msgdef *sub_m = upb_downcast_msgdef(sub_def);
- if(upb_symtab_findcycles(sub_m, depth, status)) {
- cycle_found = true;
- UPB_UPCAST(m)->is_cyclic = true;
- if(f->owned) {
- upb_atomic_unref(&sub_def->refcount);
- f->owned = false;
- }
- }
- }
- UPB_UPCAST(m)->search_depth = 0;
- return cycle_found;
- }
+upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) {
+ return upb_strtable_begin(&t->deftab);
}
-// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab. Also resolves
-// default values for enumerations and submessages.
-bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
- upb_status *status)
-{
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
- // Type names are resolved relative to the message in which they appear.
- upb_string *base = e->e.key;
-
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if(!upb_hasdef(f)) continue; // No resolving necessary.
- upb_string *name = upb_downcast_unresolveddef(f->def)->name;
-
- // Resolve from either the tmptab (pending adds) or symtab (existing
- // defs). If both exist, prefer the pending add, because it will be
- // overwriting the existing def.
- upb_symtab_ent *found;
- if(!(found = upb_resolve(tmptab, base, name)) &&
- !(found = upb_resolve(symtab, base, name))) {
- upb_seterr(status, UPB_ERROR,
- "could not resolve symbol '" UPB_STRFMT "'"
- " in context '" UPB_STRFMT "'",
- UPB_STRARG(name), UPB_STRARG(base));
- return false;
- }
-
- // Check the type of the found def.
- upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
- if(found->def->type != expected) {
- upb_seterr(status, UPB_ERROR, "Unexpected type");
- return false;
- }
- if (!upb_fielddef_resolve(f, found->def, status)) return false;
- }
- }
+upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) {
+ return upb_strtable_next(&t->deftab, i);
+}
- // Deal with type cycles.
- for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
- // The findcycles() call will decrement the external refcount of the
- upb_symtab_findcycles(m, 0, status);
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true);
- }
+bool upb_symtabtxn_done(upb_symtabtxn_iter i) {
+ return i == NULL;
+}
- return true;
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
+ upb_symtab_ent *e = iter;
+ return e->def;
}
-// Given a list of defs, a list of extensions (in the future), and a flag
-// indicating whether the new defs can overwrite existing defs in the symtab,
-// attempts to add the given defs to the symtab. The whole operation either
-// succeeds or fails. Ownership of "defs" and "exts" is taken.
-static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs,
- bool allow_redef, upb_status *status)
-{
- upb_rwlock_wrlock(&s->lock);
- // Build a table of the defs we mean to add, for duplicate detection and name
- // resolution.
- upb_strtable tmptab;
- upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent));
- for (int i = 0; i < num_defs; i++) {
- upb_def *def = defs[i];
- upb_symtab_ent e = {{def->fqname, 0}, def};
-
- // Redefinition is never allowed within a single FileDescriptorSet.
- // Additionally, we only allow overwriting of an existing definition if
- // allow_redef is set.
- if (upb_strtable_lookup(&tmptab, def->fqname) ||
- (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) {
- upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT,
- UPB_STRARG(def->fqname));
- goto err;
- }
+/* upb_symtab public interface ************************************************/
- // Pass ownership from the deflist to the strtable.
- upb_strtable_insert(&tmptab, &e.e);
- defs[i] = NULL;
+static void _upb_symtab_free(upb_strtable *t) {
+ upb_symtab_ent *e;
+ for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) {
+ assert(upb_atomic_read(&e->def->refcount) == 0);
+ upb_def_free(e->def);
}
+ upb_strtable_free(t);
+}
- // TODO: process the list of extensions by modifying entries from
- // tmptab in-place (copying them from the symtab first if necessary).
-
- if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err;
-
- // The defs in tmptab have been vetted, and can be added to the symtab
- // without causing errors. Now add all tmptab defs to the symtab,
- // overwriting (and releasing a ref on) any existing defs with the same
- // names. Ownership for tmptab defs passes from the tmptab to the symtab.
- upb_symtab_ent *tmptab_e;
- for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e;
- tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) {
- upb_symtab_ent *symtab_e =
- upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
- if(symtab_e) {
- upb_def_unref(symtab_e->def);
- symtab_e->def = tmptab_e->def;
- } else {
- upb_strtable_insert(&s->symtab, &tmptab_e->e);
- }
+static void upb_symtab_free(upb_symtab *s) {
+ _upb_symtab_free(&s->symtab);
+ for (uint32_t i = 0; i < s->olddefs.len; i++) {
+ upb_def *d = s->olddefs.defs[i];
+ assert(upb_atomic_read(&d->refcount) == 0);
+ upb_def_free(d);
}
+ upb_rwlock_destroy(&s->lock);
+ upb_deflist_uninit(&s->olddefs);
+ free(s);
+}
- upb_rwlock_unlock(&s->lock);
- upb_strtable_free(&tmptab);
- return true;
-
-err:
- // We need to free all defs from "tmptab."
- upb_rwlock_unlock(&s->lock);
- for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
- e = upb_strtable_next(&tmptab, &e->e)) {
- upb_def_unref(e->def);
+void upb_symtab_unref(upb_symtab *s) {
+ if(s && upb_atomic_unref(&s->refcount)) {
+ upb_symtab_free(s);
}
- upb_strtable_free(&tmptab);
- return false;
}
-
-/* upb_symtab public interface ************************************************/
-
-upb_symtab *upb_symtab_new()
-{
+upb_symtab *upb_symtab_new() {
upb_symtab *s = malloc(sizeof(*s));
upb_atomic_init(&s->refcount, 1);
upb_rwlock_init(&s->lock);
upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
- s->fds_msgdef = NULL;
+ upb_deflist_init(&s->olddefs);
return s;
}
-static void upb_free_symtab(upb_strtable *t)
-{
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
- upb_def_unref(e->def);
- upb_strtable_free(t);
-}
-
-void _upb_symtab_free(upb_symtab *s)
-{
- upb_free_symtab(&s->symtab);
- upb_rwlock_destroy(&s->lock);
- free(s);
-}
-
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type)
-{
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
upb_rwlock_rdlock(&s->lock);
int total = upb_strtable_count(&s->symtab);
// We may only use part of this, depending on how many symbols are of the
@@ -1281,13 +642,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type)
}
upb_rwlock_unlock(&s->lock);
*count = i;
- for(i = 0; i < *count; i++)
- upb_def_ref(defs[i]);
+ for(i = 0; i < *count; i++) upb_def_ref(defs[i]);
return defs;
}
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
-{
+upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
upb_def *ret = NULL;
@@ -1299,7 +658,6 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
return ret;
}
-
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
@@ -1311,3 +669,109 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol)
upb_rwlock_unlock(&s->lock);
return ret;
}
+
+bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
+ upb_symtabtxn *txn) {
+ // This linear search makes the DFS O(n^2) in the length of the paths.
+ // Could make this O(n) with a hash table, but n is small.
+ for (int i = 0; i < n; i++) {
+ if (def == open_defs[i]) return false;
+ }
+
+ bool needcopy = false;
+ upb_msgdef *m = upb_dyncast_msgdef(def);
+ if (m) {
+ upb_msg_iter i;
+ open_defs[n++] = def;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_hasdef(f)) continue;
+ needcopy |= upb_symtab_dfs(f->def, open_defs, n, txn);
+ }
+ }
+
+ bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
+ if (needcopy && !replacing) {
+ upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)};
+ upb_strtable_insert(&txn->deftab, &e.e);
+ replacing = true;
+ }
+ return replacing;
+}
+
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
+ upb_rwlock_wrlock(&s->lock);
+
+ // All existing defs that can reach defs that are being replaced must
+ // themselves be replaced with versions that will point to the new defs.
+ // Do a DFS -- any path that finds a new def must replace all ancestors.
+ upb_strtable *symtab = &s->symtab;
+ upb_symtab_ent *e;
+ for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) {
+ upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
+ upb_symtab_dfs(e->def, open_defs, 0, txn);
+ }
+
+ // Resolve all refs.
+ upb_strtable *txntab = &txn->deftab;
+ for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) {
+ upb_msgdef *m = upb_dyncast_msgdef(e->def);
+ if(!m) continue;
+ // Type names are resolved relative to the message in which they appear.
+ upb_string *base = m->base.fqname;
+
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if(!upb_hasdef(f)) continue; // No resolving necessary.
+ upb_string *name = upb_downcast_unresolveddef(f->def)->name;
+
+ // Resolve from either the txntab (pending adds) or symtab (existing
+ // defs). If both exist, prefer the pending add, because it will be
+ // overwriting the existing def.
+ upb_symtab_ent *found;
+ if(!(found = upb_resolve(txntab, base, name)) &&
+ !(found = upb_resolve(symtab, base, name))) {
+ upb_seterr(status, UPB_ERROR,
+ "could not resolve symbol '" UPB_STRFMT "'"
+ " in context '" UPB_STRFMT "'",
+ UPB_STRARG(name), UPB_STRARG(base));
+ return false;
+ }
+
+ // Check the type of the found def.
+ upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+ if(found->def->type != expected) {
+ upb_seterr(status, UPB_ERROR, "Unexpected type");
+ return false;
+ }
+ if (!upb_fielddef_resolve(f, found->def, status)) return false;
+ }
+ }
+
+ // The defs in the transaction have been vetted, and can be moved to the
+ // symtab without causing errors.
+ upb_symtab_ent *tmptab_e;
+ for(tmptab_e = upb_strtable_begin(txntab); tmptab_e;
+ tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) {
+ upb_def_movetosymtab(tmptab_e->def, s);
+ upb_symtab_ent *symtab_e =
+ upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
+ if(symtab_e) {
+ upb_deflist_push(&s->olddefs, symtab_e->def);
+ symtab_e->def = tmptab_e->def;
+ } else {
+ upb_strtable_insert(&s->symtab, &tmptab_e->e);
+ }
+ }
+
+ upb_strtable_clear(txntab);
+ upb_rwlock_unlock(&s->lock);
+ upb_symtab_gc(s);
+ return true;
+}
+
+void upb_symtab_gc(upb_symtab *s) {
+ (void)s;
+ // TODO.
+}
diff --git a/src/upb_def.h b/src/upb_def.h
index 776231a..ca969cb 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -4,25 +4,16 @@
* Copyright (c) 2009-2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * Provides a mechanism for loading proto definitions from descriptors, and
- * data structures to represent those definitions. These form the protobuf
- * schema, and are used extensively throughout upb:
+ * Provides a mechanism for creating and linking proto definitions.
+ * These form the protobuf schema, and are used extensively throughout upb:
* - upb_msgdef: describes a "message" construct.
* - upb_fielddef: describes a message field.
* - upb_enumdef: describes an enum.
- * (TODO: definitions of extensions and services).
+ * (TODO: definitions of services).
*
- * Defs are obtained from a upb_symtab object. A upb_symtab is empty when
- * constructed, and definitions can be added by supplying descriptors.
- *
- * Defs are immutable and reference-counted. Symbol tables reference any defs
- * that are the "current" definitions. If an extension is loaded that adds a
- * field to an existing message, a new msgdef is constructed that includes the
- * new field and the old msgdef is unref'd. The old msgdef will still be ref'd
- * by messages (if any) that were constructed with that msgdef.
- *
- * This file contains routines for creating and manipulating the definitions
- * themselves. To create and manipulate actual messages, see upb_msg.h.
+ * These defs are mutable (and not thread-safe) when first created.
+ * Once they are added to a defbuilder (and later its symtab) they become
+ * immutable.
*/
#ifndef UPB_DEF_H_
@@ -35,51 +26,37 @@
extern "C" {
#endif
-/* upb_def: base class for defs **********************************************/
+struct _upb_symtab;
+typedef struct _upb_symtab upb_symtab;
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
UPB_DEF_MSG = 0,
UPB_DEF_ENUM,
- UPB_DEF_SVC,
- UPB_DEF_EXT,
- // Internal-only, placeholder for a def that hasn't be resolved yet.
- UPB_DEF_UNRESOLVED,
+ UPB_DEF_SERVICE, // Not yet implemented.
- // For specifying that defs of any type are requsted from getdefs.
- UPB_DEF_ANY = -1
-} upb_deftype;
+ UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*()
+ UPB_DEF_UNRESOLVED = 99, // Internal-only.
+} upb_deftype_t;
-// This typedef is more space-efficient than declaring an enum var directly.
-typedef int8_t upb_deftype_t;
+
+/* upb_def: base class for defs **********************************************/
typedef struct {
- upb_string *fqname; // Fully qualified.
- upb_atomic_t refcount;
+ upb_string *fqname; // Fully qualified.
+ upb_symtab *symtab; // Def is mutable iff symtab == NULL.
+ upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
upb_deftype_t type;
-
- // The is_cyclic flag could go in upb_msgdef instead of here, because only
- // messages can be involved in cycles. However, putting them here is free
- // from a space perspective because structure alignment will otherwise leave
- // three bytes empty after type. It is also makes ref and unref more
- // efficient, because we don't have to downcast to msgdef before checking the
- // is_cyclic flag.
- bool is_cyclic;
- uint16_t search_depth; // Used during initialization dfs.
} upb_def;
-// These must not be called directly!
-void _upb_def_cyclic_ref(upb_def *def);
-void _upb_def_reftozero(upb_def *def);
-
-// Call to ref/deref a def.
-INLINE void upb_def_ref(upb_def *def) {
- if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
-}
-INLINE void upb_def_unref(upb_def *def) {
- if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
-}
+// Call to ref/unref a def. Can be used at any time, but is not thread-safe
+// until the def is in a symtab. While a def is in a symtab, everything
+// reachable from that def (the symtab and all defs in the symtab) are
+// guaranteed to be alive.
+void upb_def_ref(upb_def *def);
+void upb_def_unref(upb_def *def);
+upb_def *upb_def_dup(upb_def *def);
#define UPB_UPCAST(ptr) (&(ptr)->base)
@@ -88,30 +65,66 @@ INLINE void upb_def_unref(upb_def *def) {
// A upb_fielddef describes a single field in a message. It isn't a full def
// in the sense that it derives from upb_def. It cannot stand on its own; it
-// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
-// It is also reference-counted.
+// must be part of a upb_msgdef. It is also reference-counted.
struct _upb_fielddef {
- uint8_t type;
- uint8_t label;
- // True if we own a ref on "def" (above). This is true unless this edge is
- // part of a cycle.
- bool owned;
- uint8_t set_bit_mask;
+ struct _upb_msgdef *msgdef;
+ upb_def *def; // if upb_hasdef(f)
+ upb_atomic_t refcount;
+ bool finalized;
+ // The following fields may be modified until the def is finalized.
+ uint8_t type; // Use UPB_TYPE() constants.
+ uint8_t label; // Use UPB_LABEL() constants.
+ int16_t hasbit;
+ uint16_t offset;
int32_t number;
- int16_t field_index; // Indicates set bit.
+ upb_string *name;
+ upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
+ upb_value fval;
+ struct _upb_accessor_vtbl *accessor;
+};
- uint16_t set_bit_offset;
- uint32_t byte_offset; // Where in a upb_msg to find the data.
+upb_fielddef *upb_fielddef_new();
+void upb_fielddef_ref(upb_fielddef *f);
+void upb_fielddef_unref(upb_fielddef *f);
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
+
+// Read accessors. May be called any time.
+INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
+INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
+INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
+INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; }
+INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
+INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
+INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
+INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) {
+ return f->msgdef;
+}
+INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) {
+ return f->accessor;
+}
- upb_value default_value;
- upb_string *name;
- struct _upb_msgdef *msgdef;
+// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
+// a fielddef where !upb_hassubdef(f)).
+upb_def *upb_fielddef_subdef(upb_fielddef *f);
- // For the case of an enum or a submessage, points to the def for that type.
- upb_def *def;
- upb_atomic_t refcount;
-};
+// NULL until the fielddef has been added to a msgdef.
+
+// Write accessors. "Number" and "name" must be set before the fielddef is
+// added to a msgdef. For the moment we do not allow these to be set once
+// the fielddef is added to a msgdef -- this could be relaxed in the future.
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
+void upb_fielddef_setname(upb_fielddef *f, upb_string *name);
+
+// These writers may be called at any time prior to being put in a symtab.
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
+// The name of the message or enum this field is referring to. Must be found
+// at name resolution time (when the symtabtxn is committed to the symtab).
+void upb_fielddef_settypename(upb_fielddef *f, upb_string *name);
// A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
@@ -125,58 +138,35 @@ INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
}
INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
-INLINE bool upb_isarray(upb_fielddef *f) {
- return f->label == UPB_LABEL(REPEATED);
-}
+INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
+
// Does the type of this field imply that it should contain an associated def?
INLINE bool upb_hasdef(upb_fielddef *f) {
return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}
-INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
- if (upb_isarray(f)) {
- return UPB_VALUETYPE_ARRAY;
- } else {
- return f->type;
- }
-}
-
-INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
- assert(upb_isarray(f));
- return f->type;
-}
-
-INLINE bool upb_field_ismm(upb_fielddef *f) {
- return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
-}
-
-INLINE bool upb_elem_ismm(upb_fielddef *f) {
- return upb_isstring(f) || upb_issubmsg(f);
-}
/* upb_msgdef *****************************************************************/
// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
upb_def base;
- upb_atomic_t cycle_refcount;
- uint32_t size;
- uint32_t set_flags_bytes;
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
- // Immutable msg instance that has all default values set.
- // TODO: need a way of making this immutable!
- struct _upb_msg *default_message;
+ // The following fields may be modified until finalized.
+ uint16_t size;
+ uint8_t hasbit_bytes;
+ // The range of tag numbers used to store extensions.
+ uint32_t extension_start;
+ uint32_t extension_end;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
typedef struct {
bool junk;
- uint8_t field_type;
- uint8_t native_wire_type;
upb_fielddef *f;
} upb_itof_ent;
typedef struct {
@@ -184,23 +174,56 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
-INLINE void upb_msgdef_unref(upb_msgdef *md) {
- upb_def_unref(UPB_UPCAST(md));
+upb_msgdef *upb_msgdef_new();
+INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
+INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
+
+// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
+// the fields) but with any references to submessages broken and replaced with
+// just the name of the submessage. This can be put back into another symtab
+// and the names will be re-resolved in the new context.
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m);
+
+// Read accessors. May be called at any time.
+INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; }
+INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) {
+ return m->hasbit_bytes;
+}
+INLINE uint32_t upb_msgdef_extension_start(upb_msgdef *m) {
+ return m->extension_start;
}
-INLINE void upb_msgdef_ref(upb_msgdef *md) {
- upb_def_ref(UPB_UPCAST(md));
+INLINE uint32_t upb_msgdef_extension_end(upb_msgdef *m) {
+ return m->extension_end;
}
+// Write accessors. May only be called before the msgdef is in a symtab.
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start);
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end);
+
+// Adds a fielddef to a msgdef, and passes a ref on the field to the msgdef.
+// May only be done before the msgdef is in a symtab. The fielddef's name and
+// number must be set, and the message may not already contain any field with
+// this name or number -- if it does, the fielddef is unref'd and false is
+// returned. The fielddef may not already belong to another message.
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f);
+
+// Sets the layout of all fields according to default rules:
+// 1. Hasbits for required fields come first, then optional fields.
+// 2. Values are laid out in a way that respects alignment rules.
+// 3. The order is chosen to minimize memory usage.
+// This should only be called once all fielddefs have been added.
+// TODO: will likely want the ability to exclude strings/submessages/arrays.
+// TODO: will likely want the ability to define a header size.
+void upb_msgdef_layout(upb_msgdef *m);
+
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
-INLINE upb_itof_ent *upb_msgdef_itofent(upb_msgdef *m, uint32_t num) {
- return (upb_itof_ent*)upb_inttable_fastlookup(
- &m->itof, num, sizeof(upb_itof_ent));
-}
-
-INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) {
- upb_itof_ent *e = upb_msgdef_itofent(m, num);
+INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
+ upb_itof_ent *e = (upb_itof_ent*)
+ upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
return e ? e->f : NULL;
}
@@ -214,6 +237,7 @@ INLINE int upb_msgdef_numfields(upb_msgdef *m) {
}
// Iteration over fields. The order is undefined.
+// Iterators are invalidated when a field is added or removed.
// upb_msg_iter i;
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
// upb_fielddef *f = upb_msg_iter_field(i);
@@ -225,6 +249,7 @@ upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
+// Iterator accessor.
INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
return ent->f;
@@ -233,13 +258,11 @@ INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
/* upb_enumdef ****************************************************************/
-typedef int32_t upb_enumval_t;
-
typedef struct _upb_enumdef {
upb_def base;
upb_strtable ntoi;
upb_inttable iton;
- upb_enumval_t default_value; // The first value listed in the enum.
+ int32_t defaultval;
} upb_enumdef;
typedef struct {
@@ -252,12 +275,28 @@ typedef struct {
upb_string *string;
} upb_iton_ent;
+upb_enumdef *upb_enumdef_new();
+INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
+INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
+
+INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
+
+// May only be set before the enumdef is in a symtab.
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
+
+// Adds a value to the enumdef. Requires that no existing val has this
+// name or number (returns false and does not add if there is). May only
+// be called before the enumdef is in a symtab.
+bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num);
+
// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
+bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num);
// Caller does not own a ref on the returned string.
-upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
+upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined.
+// Adding an enum val invalidates any iterators.
// upb_enum_iter i;
// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// // ...
@@ -268,6 +307,7 @@ upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
+// Iterator accessors.
INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
return e->string;
@@ -277,28 +317,74 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
}
+/* upb_symtabtxn **************************************************************/
+
+// A symbol table transaction is a map of defs that can be added to a symtab
+// in one single atomic operation that either succeeds or fails. Mutable defs
+// can be added to this map (and perhaps removed, in the future).
+//
+// A symtabtxn is not thread-safe.
+
+typedef struct {
+ upb_strtable deftab;
+} upb_symtabtxn;
+
+void upb_symtabtxn_init(upb_symtabtxn *t);
+void upb_symtabtxn_uninit(upb_symtabtxn *t);
+
+// Adds a def to the symtab. Caller passes a ref on the def to the symtabtxn.
+// The def's name must be set and there must not be any existing defs in the
+// symtabtxn with this name, otherwise false will be returned and no operation
+// will be performed (and the ref on the def will be released).
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
+
+// Gets the def (if any) that is associated with this name in the symtab.
+// Caller does *not* inherit a ref on the def.
+upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
+
+// Iterate over the defs that are part of the transaction.
+// The order is undefined.
+// The iterator is invalidated by upb_symtabtxn_add().
+// upb_symtabtxn_iter i;
+// for(i = upb_symtabtxn_begin(t); !upb_symtabtxn_done(t);
+// i = upb_symtabtxn_next(t, i)) {
+// upb_def *def = upb_symtabtxn_iter_def(i);
+// }
+typedef void* upb_symtabtxn_iter;
+
+upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t);
+upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i);
+bool upb_symtabtxn_done(upb_symtabtxn_iter i);
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter);
+
+
/* upb_symtab *****************************************************************/
// A SymbolTable is where upb_defs live. It is empty when first constructed.
-// Clients add definitions to the symtab by supplying descriptors (as defined
-// in descriptor.proto) via the upb_stream interface.
+// Clients add definitions to the symtab (or replace existing definitions) by
+// using a upb_symtab_commit() or calling upb_symtab_add().
+
+// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
+typedef struct {
+ upb_def **defs;
+ uint32_t len;
+ uint32_t size;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);
+
struct _upb_symtab {
upb_atomic_t refcount;
upb_rwlock_t lock; // Protects all members except the refcount.
upb_strtable symtab; // The symbol table.
- upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet.
+ upb_deflist olddefs;
};
-typedef struct _upb_symtab upb_symtab;
-// Initializes a upb_symtab. Symtabs are not freed explicitly, but unref'd
-// when the caller is done with them.
upb_symtab *upb_symtab_new(void);
-void _upb_symtab_free(upb_symtab *s); // Must not be called directly!
-
INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
-INLINE void upb_symtab_unref(upb_symtab *s) {
- if(s && upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
-}
+void upb_symtab_unref(upb_symtab *s);
// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
@@ -310,35 +396,36 @@ INLINE void upb_symtab_unref(upb_symtab *s) {
//
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
+// TODO: make return const
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
+// TODO: make return const
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type);
+// TODO: make return const
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type);
-// upb_defbuilder: For adding defs to the symtab.
-// You allocate the defbuilder, which can handle a single descriptor.
-// It will be freed automatically when the parse completes.
-struct _upb_defbuilder;
-typedef struct _upb_defbuilder upb_defbuilder;
-struct _upb_handlers;
-struct _upb_handlers;
+// Adds a single upb_def into the symtab. A ref on the def is passed to the
+// symtab. If any references cannot be resolved, false is returned and the
+// symtab is unchanged. The error (if any) is saved to status if non-NULL.
+bool upb_symtab_add(upb_symtab *s, upb_def *d, upb_status *status);
-// Allocates a new defbuilder that will add defs to the given symtab.
-upb_defbuilder *upb_defbuilder_new(upb_symtab *s);
+// Adds the set of defs contained in the transaction to the symtab, clearing
+// the txn. The entire operation either succeeds or fails. If the operation
+// fails, the symtab is unchanged, false is returned, and status indicates
+// the error.
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *t, upb_status *status);
-// Registers handlers that will operate on a defbuilder to add the defs
-// to the defbuilder's symtab. Will free itself when the parse finishes.
-//
-// TODO: should this allow redefinition? Either is possible, but which is
-// more useful? Maybe it should be an option.
-struct _upb_mhandlers *upb_defbuilder_reghandlers(struct _upb_handlers *h);
+// Frees defs that are no longer active in the symtab and are no longer
+// reachable. Such defs are not freed when they are replaced in the symtab
+// if they are still reachable from defs that are still referenced.
+void upb_symtab_gc(upb_symtab *s);
/* upb_def casts **************************************************************/
@@ -352,8 +439,7 @@ struct _upb_mhandlers *upb_defbuilder_reghandlers(struct _upb_handlers *h);
}
UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
-UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
-UPB_DYNAMIC_CAST_DEF(extdef, EXT);
+UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE);
UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DYNAMIC_CAST_DEF
@@ -367,8 +453,7 @@ UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
}
UPB_DOWNCAST_DEF(msgdef, MSG);
UPB_DOWNCAST_DEF(enumdef, ENUM);
-UPB_DOWNCAST_DEF(svcdef, SVC);
-UPB_DOWNCAST_DEF(extdef, EXT);
+UPB_DOWNCAST_DEF(svcdef, SERVICE);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
diff --git a/src/upb_descriptor.c b/src/upb_descriptor.c
new file mode 100644
index 0000000..127d19c
--- /dev/null
+++ b/src/upb_descriptor.c
@@ -0,0 +1,548 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb_descriptor.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include "upb_string.h"
+#include "upb_def.h"
+
+/* Joins strings together, for example:
+ * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+ * join("", "Baz") -> "Baz"
+ * Caller owns a ref on the returned string. */
+static upb_string *upb_join(upb_string *base, upb_string *name) {
+ if (!base || upb_string_len(base) == 0) {
+ return upb_string_getref(name);
+ } else {
+ return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
+ UPB_STRARG(base), UPB_STRARG(name));
+ }
+}
+
+/* upb_descreader ************************************************************/
+
+// A upb_descreader builds a list of defs by handling a parse of a protobuf in
+// the format defined in descriptor.proto. The output of a upb_descreader is
+// a upb_symtabtxn.
+
+static upb_def *upb_deflist_last(upb_deflist *l) {
+ return l->defs[l->len-1];
+}
+
+// Qualify the defname for all defs starting with offset "start" with "str".
+static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
+ for(uint32_t i = start; i < l->len; i++) {
+ upb_def *def = l->defs[i];
+ upb_string *name = def->fqname;
+ def->fqname = upb_join(str, name);
+ upb_string_unref(name);
+ }
+}
+
+// Forward declares for top-level file descriptors.
+static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
+static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
+
+void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
+ upb_deflist_init(&r->defs);
+ upb_status_init(&r->status);
+ r->txn = txn;
+ r->stack_len = 0;
+ r->name = NULL;
+ r->default_string = NULL;
+}
+
+void upb_descreader_uninit(upb_descreader *r) {
+ upb_string_unref(r->name);
+ upb_status_uninit(&r->status);
+ upb_deflist_uninit(&r->defs);
+ upb_string_unref(r->default_string);
+ while (r->stack_len > 0) {
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
+ upb_string_unref(f->name);
+ }
+}
+
+static upb_msgdef *upb_descreader_top(upb_descreader *r) {
+ if (r->stack_len <= 1) return NULL;
+ int index = r->stack[r->stack_len-1].start - 1;
+ assert(index >= 0);
+ return upb_downcast_msgdef(r->defs.defs[index]);
+}
+
+static upb_def *upb_descreader_last(upb_descreader *r) {
+ return upb_deflist_last(&r->defs);
+}
+
+// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
+// entities that have names and can contain sub-definitions.
+void upb_descreader_startcontainer(upb_descreader *r) {
+ upb_descreader_frame *f = &r->stack[r->stack_len++];
+ f->start = r->defs.len;
+ f->name = NULL;
+}
+
+void upb_descreader_endcontainer(upb_descreader *r) {
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
+ upb_deflist_qualify(&r->defs, f->name, f->start);
+ upb_string_unref(f->name);
+}
+
+void upb_descreader_setscopename(upb_descreader *r, upb_string *str) {
+ upb_descreader_frame *f = &r->stack[r->stack_len-1];
+ upb_string_unref(f->name);
+ f->name = upb_string_getref(str);
+}
+
+// Handlers for google.protobuf.FileDescriptorProto.
+static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ upb_descreader_startcontainer(r);
+ return UPB_CONTINUE;
+}
+
+static void upb_descreader_FileDescriptorProto_endmsg(void *_r,
+ upb_status *status) {
+ (void)status;
+ upb_descreader *r = _r;
+ upb_descreader_endcontainer(r);
+}
+
+static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_descreader_setscopename(r, upb_value_getstr(val));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_descreader_register_FileDescriptorProto(
+ upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg);
+
+#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
+#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
+ upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package);
+
+ upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
+ upb_msgdef_register_DescriptorProto(h));
+ upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
+ upb_enumdef_register_EnumDescriptorProto(h));
+ // TODO: services, extensions
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+// Handlers for google.protobuf.FileDescriptorSet.
+static void upb_descreader_FileDescriptorSet_onendmsg(void *_r,
+ upb_status *status) {
+ // Move all defs (which are now guaranteed to be fully-qualified) to the txn.
+ upb_descreader *r = _r;
+ if (upb_ok(status)) {
+ for (unsigned int i = 0; i < r->defs.len; i++) {
+ // TODO: check return for duplicate def.
+ upb_symtabtxn_add(r->txn, r->defs.defs[i]);
+ }
+ r->defs.len = 0;
+ }
+}
+
+static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setendmsg(m, upb_descreader_FileDescriptorSet_onendmsg);
+
+#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
+#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
+ upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
+ upb_descreader_register_FileDescriptorProto(h));
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) {
+ h->should_jit = false;
+ return upb_descreader_register_FileDescriptorSet(h);
+}
+
+// google.protobuf.EnumValueDescriptorProto.
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ r->saw_number = false;
+ r->saw_name = false;
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_string_unref(r->name);
+ r->name = upb_string_getref(upb_value_getstr(val));
+ r->saw_name = true;
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ r->number = upb_value_getint32(val);
+ r->saw_number = true;
+ return UPB_CONTINUE;
+}
+
+static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
+ upb_status *status) {
+ upb_descreader *r = _r;
+ if(!r->saw_number || !r->saw_name) {
+ upb_seterr(status, UPB_ERROR, "Enum value missing name or number.");
+ return;
+ }
+ upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+ if (upb_inttable_count(&e->iton) == 0) {
+ // The default value of an enum (in the absence of an explicit default) is
+ // its first listed value.
+ upb_enumdef_setdefault(e, r->number);
+ }
+ upb_enumdef_addval(e, r->name, r->number);
+ upb_string_unref(r->name);
+ r->name = NULL;
+}
+
+static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
+ upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
+ upb_fhandlers *f;
+ f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+ upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
+
+ f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
+ upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+// google.protobuf.EnumDescriptorProto.
+static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
+ return UPB_CONTINUE;
+}
+
+static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
+ upb_descreader *r = _r;
+ upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+ if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
+ upb_seterr(status, UPB_ERROR, "Enum had no name.");
+ return;
+ }
+ if (upb_inttable_count(&e->iton) == 0) {
+ upb_seterr(status, UPB_ERROR, "Enum had no values.");
+ return;
+ }
+}
+
+static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+ upb_string_unref(e->base.fqname);
+ e->base.fqname = upb_string_getref(upb_value_getstr(val));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+ upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
+
+ upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
+ upb_enumdef_register_EnumValueDescriptorProto(h));
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+static upb_flow_t upb_fielddef_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ r->f = upb_fielddef_new();
+ return UPB_CONTINUE;
+}
+
+// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
+// Returns true on success.
+static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) {
+ bool success = true;
+ if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+ // We'll keep the ref we had on it. We include enums in this case because
+ // we need the enumdef to resolve the name, but we may not have it yet.
+ // We'll resolve it later.
+ if (dstr) {
+ upb_value_setstr(d, dstr);
+ } else {
+ upb_value_setstr(d, upb_emptystring());
+ }
+ } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+ // We don't expect to get a default value.
+ upb_string_unref(dstr);
+ if (dstr != NULL) success = false;
+ } else {
+ // The strto* functions need the string to be NULL-terminated.
+ char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
+ char *end;
+ upb_string_unref(dstr);
+ switch (type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ if (strz) {
+ long val = strtol(strz, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
+ } else {
+ upb_value_setint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (strz) {
+ upb_value_setint64(d, strtoll(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (strz) {
+ unsigned long val = strtoul(strz, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
+ } else {
+ upb_value_setuint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (strz) {
+ upb_value_setuint64(d, strtoull(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setuint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (strz) {
+ upb_value_setdouble(d, strtod(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setdouble(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(FLOAT):
+ if (strz) {
+ upb_value_setfloat(d, strtof(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setfloat(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(BOOL):
+ if (!strz || strcmp(strz, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(strz, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ break;
+ }
+ free(strz);
+ }
+ return success;
+}
+
+static void upb_fielddef_endmsg(void *_r, upb_status *status) {
+ upb_descreader *r = _r;
+ upb_fielddef *f = r->f;
+ // TODO: verify that all required fields were present.
+ assert(f->number != -1 && f->name != NULL);
+ assert((f->def != NULL) == upb_hasdef(f));
+
+ // Field was successfully read, add it as a field of the msgdef.
+ upb_msgdef *m = upb_descreader_top(r);
+ upb_msgdef_addfield(m, f);
+ upb_string *dstr = r->default_string;
+ r->default_string = NULL;
+ upb_value val;
+ if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
+ // We don't worry too much about giving a great error message since the
+ // compiler should have ensured this was correct.
+ upb_seterr(status, UPB_ERROR, "Error converting default value.");
+ return;
+ }
+ upb_fielddef_setdefault(f, val);
+}
+
+static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_settype(r->f, upb_value_getint32(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_setlabel(r->f, upb_value_getint32(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_setnumber(r->f, upb_value_getint32(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_setname(r->f, upb_value_getstr(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_settypename(r->f, upb_value_getstr(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ // Have to convert from string to the correct type, but we might not know the
+ // type yet.
+ upb_string_unref(r->default_string);
+ r->default_string = upb_string_getref(upb_value_getstr(val));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
+ upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
+
+#define FIELD(name, handler) \
+ upb_fhandlers_setvalue( \
+ upb_mhandlers_newfhandlers(m, \
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
+ false), \
+ handler);
+ FIELD(TYPE, &upb_fielddef_ontype);
+ FIELD(LABEL, &upb_fielddef_onlabel);
+ FIELD(NUMBER, &upb_fielddef_onnumber);
+ FIELD(NAME, &upb_fielddef_onname);
+ FIELD(TYPE_NAME, &upb_fielddef_ontypename);
+ FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+
+// google.protobuf.DescriptorProto.
+static upb_flow_t upb_msgdef_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
+ upb_descreader_startcontainer(r);
+ return UPB_CONTINUE;
+}
+
+static void upb_msgdef_endmsg(void *_r, upb_status *status) {
+ upb_descreader *r = _r;
+ upb_msgdef *m = upb_descreader_top(r);
+ if(!m->base.fqname) {
+ upb_seterr(status, UPB_ERROR, "Encountered message with no name.");
+ return;
+ }
+
+ upb_msgdef_layout(m);
+ upb_descreader_endcontainer(r);
+}
+
+static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ assert(val.type == UPB_TYPE(STRING));
+ upb_msgdef *m = upb_descreader_top(r);
+ upb_string_unref(m->base.fqname);
+ m->base.fqname = upb_string_getref(upb_value_getstr(val));
+ upb_descreader_setscopename(r, upb_value_getstr(val));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+ upb_fhandlers_setvalue(f, &upb_msgdef_onname);
+
+ upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
+ upb_fielddef_register_FieldDescriptorProto(h));
+ upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
+ upb_enumdef_register_EnumDescriptorProto(h));
+
+ // DescriptorProto is self-recursive, so we must link the definition.
+ upb_mhandlers_newfhandlers_subm(
+ m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
+
+ // TODO: extensions.
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
diff --git a/src/upb_descriptor.h b/src/upb_descriptor.h
new file mode 100644
index 0000000..f74de3b
--- /dev/null
+++ b/src/upb_descriptor.h
@@ -0,0 +1,67 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Routines for building defs by parsing descriptors in descriptor.proto format.
+ * This only needs to use the public API of upb_symtab. Later we may also
+ * add routines for dumping a symtab to a descriptor.
+ */
+
+#ifndef UPB_DESCRIPTOR_H
+#define UPB_DESCRIPTOR_H
+
+#include "upb_handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_descreader ************************************************************/
+
+// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn.
+
+// We keep a stack of all the messages scopes we are currently in, as well as
+// the top-level file scope. This is necessary to correctly qualify the
+// definitions that are contained inside. "name" tracks the name of the
+// message or package (a bare name -- not qualified by any enclosing scopes).
+typedef struct {
+ upb_string *name;
+ // Index of the first def that is under this scope. For msgdefs, the
+ // msgdef itself is at start-1.
+ int start;
+} upb_descreader_frame;
+
+typedef struct {
+ upb_deflist defs;
+ upb_symtabtxn *txn;
+ upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH];
+ int stack_len;
+ upb_status status;
+
+ uint32_t number;
+ upb_string *name;
+ bool saw_number;
+ bool saw_name;
+
+ upb_string *default_string;
+
+ upb_fielddef *f;
+} upb_descreader;
+
+// Creates a new descriptor builder that will add defs to the given txn.
+void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn);
+void upb_descreader_uninit(upb_descreader *r);
+
+// Registers handlers that will load descriptor data into a symtabtxn.
+// Pass the descreader as the closure. The messages will have
+// upb_msgdef_layout() called on them before adding to the txn.
+upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/src/upb_glue.c b/src/upb_glue.c
index 1422463..f288855 100644
--- a/src/upb_glue.c
+++ b/src/upb_glue.c
@@ -5,32 +5,29 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*/
+#include "upb_decoder.h"
+#include "upb_descriptor.h"
#include "upb_glue.h"
#include "upb_msg.h"
-#include "upb_decoder.h"
#include "upb_strstream.h"
#include "upb_textprinter.h"
-void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
+void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md,
upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
- upb_handlers *h = upb_handlers_new();
- upb_msg_reghandlers(h, md);
-
upb_decoder d;
- upb_decoder_init(&d, h);
+ upb_decoder_initformsgdef(&d, md);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg);
- upb_handlers_unref(h);
-
upb_decoder_decode(&d, status);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
+#if 0
void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
bool single_line) {
upb_stringsink strsink;
@@ -53,23 +50,49 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_textprinter_free(p);
upb_handlers_unref(h);
}
+#endif
-void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
+// TODO: read->load.
+void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
upb_handlers *h = upb_handlers_new();
- upb_defbuilder_reghandlers(h);
+ upb_descreader_reghandlers(h);
upb_decoder d;
- upb_decoder_init(&d, h);
+ upb_decoder_initforhandlers(&d, h);
upb_handlers_unref(h);
- upb_defbuilder *b = upb_defbuilder_new(symtab);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), b);
+ upb_descreader r;
+ upb_symtabtxn txn;
+ upb_symtabtxn_init(&txn);
+ upb_descreader_init(&r, &txn);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r);
upb_decoder_decode(&d, status);
+ // Set default accessors and layouts on all messages.
+ // for msgdef in symtabtxn:
+ upb_symtabtxn_iter i;
+ for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i);
+ i = upb_symtabtxn_next(&txn, i)) {
+ upb_def *def = upb_symtabtxn_iter_def(i);
+ upb_msgdef *md = upb_dyncast_msgdef(def);
+ if (!md) return;
+ // For field in msgdef:
+ upb_msg_iter i;
+ for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
+ }
+ upb_msgdef_layout(md);
+ }
+
+ if (upb_ok(status)) upb_symtab_commit(symtab, &txn, status);
+
+ upb_symtabtxn_uninit(&txn);
+ upb_descreader_uninit(&r);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
diff --git a/src/upb_glue.h b/src/upb_glue.h
index d1a26d1..27611cd 100644
--- a/src/upb_glue.h
+++ b/src/upb_glue.h
@@ -42,14 +42,14 @@ struct _upb_symtab;
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
-void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg,
+void upb_strtomsg(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, struct _upb_status *s);
-void upb_msgtotext(struct _upb_string *str, struct _upb_msg *msg,
+void upb_msgtotext(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, bool single_line);
-void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str,
- struct _upb_status *status);
+void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str,
+ struct _upb_status *status);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/upb_handlers.c b/src/upb_handlers.c
index e630975..c29281a 100644
--- a/src/upb_handlers.c
+++ b/src/upb_handlers.c
@@ -123,9 +123,9 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
fieldreg_cb, closure, mtab);
}
fh = upb_mhandlers_newfhandlers_subm(
- mh, f->number, f->type, upb_isarray(f), sub_mh);
+ mh, f->number, f->type, upb_isseq(f), sub_mh);
} else {
- fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isarray(f));
+ fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f));
}
if (fieldreg_cb) fieldreg_cb(closure, fh, f);
}
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 91f1454..b88df32 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -9,201 +9,23 @@
#include "upb_msg.h"
-static uint32_t upb_round_up_pow2(uint32_t v) {
- // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
-}
-
-static void upb_elem_free(upb_value v, upb_fielddef *f) {
- switch(f->type) {
- case UPB_TYPE(MESSAGE):
- case UPB_TYPE(GROUP):
- _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def));
- break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES):
- _upb_string_free(upb_value_getstr(v));
- break;
- default:
- abort();
- }
-}
-
-static void upb_elem_unref(upb_value v, upb_fielddef *f) {
- assert(upb_elem_ismm(f));
- upb_atomic_t *refcount = upb_value_getrefcount(v);
- if (refcount && upb_atomic_unref(refcount))
- upb_elem_free(v, f);
-}
-
-static void upb_field_free(upb_value v, upb_fielddef *f) {
- if (upb_isarray(f)) {
- _upb_array_free(upb_value_getarr(v), f);
- } else {
- upb_elem_free(v, f);
- }
-}
-
-static void upb_field_unref(upb_value v, upb_fielddef *f) {
- assert(upb_field_ismm(f));
- upb_atomic_t *refcount = upb_value_getrefcount(v);
- if (refcount && upb_atomic_unref(refcount))
- upb_field_free(v, f);
-}
-
-
-/* upb_array ******************************************************************/
-
-upb_array *upb_array_new(void) {
- upb_array *arr = malloc(sizeof(*arr));
- upb_atomic_init(&arr->refcount, 1);
- arr->size = 0;
- arr->len = 0;
- arr->ptr = NULL;
- return arr;
-}
-
-void __attribute__((noinline)) upb_array_dorecycle(upb_array **_arr) {
- upb_array *arr = *_arr;
- if(arr && upb_atomic_only(&arr->refcount)) {
- arr->len = 0;
- } else {
- if (arr) {
- bool was_lastref = upb_atomic_unref(&arr->refcount);
- (void)was_lastref;
- assert(!was_lastref); // If it was, we would have just recycled.
- }
- *_arr = upb_array_new();
- }
-}
-
-void upb_array_recycle(upb_array **_arr) {
- upb_array *arr = *_arr;
- if(arr && upb_atomic_only(&arr->refcount)) {
- arr->len = 0;
- } else {
- upb_array_dorecycle(_arr);
- }
-}
-
-void _upb_array_free(upb_array *arr, upb_fielddef *f) {
- if (upb_elem_ismm(f)) {
- // Need to release refs on sub-objects.
- upb_valuetype_t type = upb_elem_valuetype(f);
- for (int32_t i = 0; i < arr->size; i++) {
- upb_valueptr p = _upb_array_getptr(arr, f, i);
- upb_elem_unref(upb_value_read(p, type), f);
- }
- }
- free(arr->ptr);
- free(arr);
-}
-
-void __attribute__((noinline)) upb_array_doresize(
- upb_array *arr, size_t type_size, upb_arraylen_t len) {
- upb_arraylen_t old_size = arr->size;
- size_t new_size = upb_round_up_pow2(len);
- arr->ptr = realloc(arr->ptr, new_size * type_size);
- arr->size = new_size;
- memset(arr->ptr + (old_size * type_size), 0,
- (new_size - old_size) * type_size);
-}
-
-void upb_array_resizefortypesize(upb_array *arr, size_t type_size,
- int32_t len) {
- assert(len >= 0);
- if (arr->size < len) upb_array_doresize(arr, type_size, len);
- arr->len = len;
-}
-
-void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) {
- upb_array_resizefortypesize(arr, upb_types[f->type].size, len);
-}
-
-
-/* upb_msg ********************************************************************/
-
-upb_msg *upb_msg_new(upb_msgdef *md) {
- upb_msg *msg = malloc(md->size);
- // Clear all set bits and cached pointers.
- memset(msg, 0, md->size);
- upb_atomic_init(&msg->refcount, 1);
- return msg;
-}
-
-void _upb_msg_free(upb_msg *msg, upb_msgdef *md) {
- // Need to release refs on all sub-objects.
- upb_msg_iter i;
- for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- upb_valueptr p = _upb_msg_getptr(msg, f);
- upb_valuetype_t type = upb_field_valuetype(f);
- if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f);
- }
- free(msg);
-}
-
-void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) {
- upb_msg *msg = *_msg;
- if(msg && upb_atomic_only(&msg->refcount)) {
- upb_msg_clear(msg, msgdef);
- } else {
- upb_msg_unref(msg, msgdef);
- if (msg) {
- bool was_lastref = upb_atomic_unref(&msg->refcount);
- (void)was_lastref;
- assert(!was_lastref);
- }
- *_msg = upb_msg_new(msgdef);
- }
-}
-
-INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
- msg->data[f->set_bit_offset] |= f->set_bit_mask;
-}
-
-void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
- assert(val.type == upb_types[upb_field_valuetype(f)].inmemory_type);
- upb_valueptr ptr = _upb_msg_getptr(msg, f);
- if (upb_field_ismm(f)) {
- // Unref any previous value we may have had there.
- upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
- upb_field_unref(oldval, f);
-
- // Ref the new value.
- upb_atomic_t *refcount = upb_value_getrefcount(val);
- if (refcount) upb_atomic_ref(refcount);
- }
- upb_msg_sethas(msg, f);
- return upb_value_write(ptr, val, upb_field_valuetype(f));
-}
-
-upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
- if (!upb_msg_has(msg, f)) {
- upb_value val = f->default_value;
- if (upb_issubmsg(f)) {
- // TODO: handle arrays also, which must be treated similarly.
- upb_msgdef *md = upb_downcast_msgdef(f->def);
- upb_msg *m = upb_msg_new(md);
- // Copy all set bits and values, except the refcount.
- memcpy(m , upb_value_getmsg(val), md->size);
- upb_atomic_init(&m->refcount, 0); // The msg will take a ref.
- upb_value_setmsg(&val, m);
- }
- upb_msg_set(msg, f, val);
- return val;
- } else {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+void upb_msg_clear(void *msg, upb_msgdef *md) {
+ memset(msg, 0, md->hasbit_bytes);
+ // TODO: set primitive fields to defaults?
+}
+
+void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
+ assert(a->len <= a->size);
+ if (a->len == a->size) {
+ size_t old_size = a->size;
+ a->size = old_size == 0 ? 8 : (old_size * 2);
+ a->ptr = realloc(a->ptr, a->size * type_size);
+ memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
}
+ return &a->ptr[a->len++ * type_size];
}
+#if 0
static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
upb_dispatcher *d);
@@ -253,110 +75,64 @@ void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
upb_dispatcher_uninit(&d);
}
+#endif
-static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
- upb_valueptr p = _upb_msg_getptr(msg, f);
- if (upb_isarray(f)) {
- // Create/recycle/resize the array if necessary, and find a pointer to
- // a newly-appended element.
- if (!upb_msg_has(msg, f)) {
- upb_array_recycle(p.arr);
- upb_msg_sethas(msg, f);
- }
- assert(*p.arr != NULL);
- upb_arraylen_t oldlen = upb_array_len(*p.arr);
- upb_array_resize(*p.arr, f, oldlen + 1);
- p = _upb_array_getptr(*p.arr, f, oldlen);
- }
- return p;
-}
-
-upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
- upb_valueptr p = upb_msg_getappendptr(msg, f);
- if (upb_isarray(f) || !upb_msg_has(msg, f)) {
- upb_msg_recycle(p.msg, msgdef);
- upb_msg_sethas(msg, f);
- }
- return *p.msg;
-}
-
-
-/* upb_msg handlers ***********************************************************/
+/* Standard writers. **********************************************************/
-#if UPB_MAX_FIELDS > 2048
-#error "We're using an 8-bit integer to store a has_offset."
-#endif
-typedef struct {
- uint8_t has_offset;
- uint8_t has_mask;
- uint16_t val_offset;
- uint16_t msg_size;
- uint8_t set_flags_bytes;
- uint8_t padding;
-} upb_msgsink_fval;
-
-static upb_msgsink_fval upb_msgsink_unpackfval(upb_value fval) {
- assert(sizeof(upb_msgsink_fval) == 8);
- upb_msgsink_fval ret;
- uint64_t fval_u64 = upb_value_getuint64(fval);
- memcpy(&ret, &fval_u64, 8);
- return ret;
+void upb_stdmsg_sethas(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
}
-static uint64_t upb_msgsink_packfval(uint8_t has_offset, uint8_t has_mask,
- uint16_t val_offset, uint16_t msg_size,
- uint8_t set_flags_bytes) {
- upb_msgsink_fval fval = {
- has_offset, has_mask, val_offset, msg_size, set_flags_bytes, 0};
- uint64_t ret = 0;
- memcpy(&ret, &fval, sizeof(fval));
- return ret;
+bool upb_stdmsg_has(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
}
-#define SCALAR_VALUE_CB_PAIR(type, ctype) \
- upb_flow_t upb_msgsink_ ## type ## value(void *_m, upb_value _fval, \
- upb_value val) { \
- upb_msg *m = _m; \
- upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); \
- m->data[fval.has_offset] |= fval.has_mask; \
- *(ctype*)&m->data[fval.val_offset] = upb_value_get ## type(val); \
+#define UPB_ACCESSORS(type, ctype) \
+ upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \
+ upb_value val) { \
+ upb_fielddef *f = upb_value_getfielddef(fval); \
+ uint8_t *m = _m; \
+ upb_stdmsg_sethas(_m, fval); \
+ *(ctype*)&m[f->offset] = upb_value_get ## type(val); \
return UPB_CONTINUE; \
} \
\
- upb_flow_t upb_msgsink_ ## type ## value_r(void *_a, upb_value _fval, \
- upb_value val) { \
+ upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \
+ upb_value val) { \
(void)_fval; \
- upb_array *arr = _a; \
- upb_array_resizefortypesize(arr, sizeof(ctype), arr->len+1); \
- upb_valueptr p = _upb_array_getptrforsize(arr, sizeof(ctype), \
- arr->len-1); \
- *(ctype*)p._void = upb_value_get ## type(val); \
+ ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \
+ *p = upb_value_get ## type(val); \
return UPB_CONTINUE; \
} \
-
-SCALAR_VALUE_CB_PAIR(double, double)
-SCALAR_VALUE_CB_PAIR(float, float)
-SCALAR_VALUE_CB_PAIR(int32, int32_t)
-SCALAR_VALUE_CB_PAIR(int64, int64_t)
-SCALAR_VALUE_CB_PAIR(uint32, uint32_t)
-SCALAR_VALUE_CB_PAIR(uint64, uint64_t)
-SCALAR_VALUE_CB_PAIR(bool, bool)
-
-upb_sflow_t upb_msgsink_startseq(void *_m, upb_value _fval) {
- upb_msg *m = _m;
- upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
- upb_array **arr = (upb_array**)&m->data[fval.val_offset];
- if (!(m->data[fval.has_offset] & fval.has_mask)) {
- upb_array_recycle(arr);
- m->data[fval.has_offset] |= fval.has_mask;
+ \
+ upb_value upb_stdmsg_get ## type(void *_m, upb_value fval) { \
+ uint8_t *m = _m; \
+ upb_fielddef *f = upb_value_getfielddef(fval); \
+ upb_value ret; \
+ upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \
+ return ret; \
+ } \
+ upb_value upb_stdmsg_seqget ## type(void *i) { \
+ upb_value val; \
+ upb_value_set ## type(&val, *(ctype*)i); \
+ return val; \
}
- return UPB_CONTINUE_WITH(*arr);
-}
-upb_flow_t upb_msgsink_strvalue(void *_m, upb_value _fval, upb_value val) {
- upb_msg *m = _m;
- upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
- m->data[fval.has_offset] |= fval.has_mask;
+UPB_ACCESSORS(double, double)
+UPB_ACCESSORS(float, float)
+UPB_ACCESSORS(int32, int32_t)
+UPB_ACCESSORS(int64, int64_t)
+UPB_ACCESSORS(uint32, uint32_t)
+UPB_ACCESSORS(uint64, uint64_t)
+UPB_ACCESSORS(bool, bool)
+UPB_ACCESSORS(ptr, void*)
+#undef UPB_ACCESSORS
+
+static void _upb_stdmsg_setstr(void *_dst, upb_value _src) {
// We do:
// - upb_string_recycle(), upb_string_substr() instead of
// - upb_string_unref(), upb_string_getref()
@@ -369,115 +145,204 @@ upb_flow_t upb_msgsink_strvalue(void *_m, upb_value _fval, upb_value val) {
// allocate string objects whereas a upb_string_getref could have avoided
// those allocations completely; if this is an issue, we could make it an
// option of the upb_msgsink which behavior is desired.
- upb_string *src = upb_value_getstr(val);
- upb_string **dst = (void*)&m->data[fval.val_offset];
+ upb_string **dst = _dst;
+ upb_string *src = upb_value_getstr(_src);
upb_string_recycle(dst);
upb_string_substr(*dst, src, 0, upb_string_len(src));
+}
+
+upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ upb_stdmsg_sethas(_m, fval);
+ _upb_stdmsg_setstr(&m[f->offset], val);
return UPB_CONTINUE;
}
-upb_flow_t upb_msgsink_strvalue_r(void *_a, upb_value _fval,
- upb_value val) {
- upb_array *arr = _a;
- (void)_fval;
- upb_array_resizefortypesize(arr, sizeof(void*), arr->len+1);
- upb_valueptr p = _upb_array_getptrforsize(arr, sizeof(void*),
- upb_array_len(arr)-1);
- upb_string *src = upb_value_getstr(val);
- upb_string_recycle(p.str);
- upb_string_substr(*p.str, src, 0, upb_string_len(src));
+upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
+ (void)fval;
+ _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
return UPB_CONTINUE;
}
+upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
+ upb_value val = upb_stdmsg_getptr(m, fval);
+ upb_value_setstr(&val, upb_value_getptr(val));
+ return val;
+}
+
+upb_value upb_stdmsg_seqgetstr(void *i) {
+ upb_value val = upb_stdmsg_seqgetptr(i);
+ upb_value_setstr(&val, upb_value_getptr(val));
+ return val;
+}
+
+void *upb_stdmsg_new(upb_msgdef *md) {
+ void *m = malloc(md->size);
+ memset(m, 0, md->size);
+ upb_msg_clear(m, md);
+ return m;
+}
-upb_sflow_t upb_msgsink_startsubmsg(void *_m, upb_value _fval) {
- upb_msg *msg = _m;
- upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
-
- upb_msgdef md;
- md.size = fval.msg_size;
- md.set_flags_bytes = fval.set_flags_bytes;
- upb_fielddef f;
- f.set_bit_mask = fval.has_mask;
- f.set_bit_offset = fval.has_offset;
- f.label = UPB_LABEL(OPTIONAL); // Just not repeated.
- f.type = UPB_TYPE(MESSAGE);
- f.byte_offset = fval.val_offset;
-
- upb_msg **subm = _upb_msg_getptr(msg, &f).msg;
- if (!upb_msg_has(msg, &f)) {
- upb_msg_recycle(subm, &md);
- upb_msg_sethas(msg, &f);
+void upb_stdseq_free(void *s, upb_fielddef *f) {
+ upb_stdarray *a = s;
+ if (upb_issubmsg(f) || upb_isstring(f)) {
+ void **p = (void**)a->ptr;
+ for (int i = 0; i < a->size; i++) {
+ if (upb_issubmsg(f)) {
+ upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
+ } else {
+ upb_string_unref(p[i]);
+ }
+ }
+ }
+ free(a->ptr);
+ free(a);
+}
+
+void upb_stdmsg_free(void *m, upb_msgdef *md) {
+ if (m == NULL) return;
+ upb_msg_iter i;
+ for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
+ void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
+ if (subp == NULL) continue;
+ if (upb_isseq(f)) {
+ upb_stdseq_free(subp, f);
+ } else if (upb_issubmsg(f)) {
+ upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
+ } else {
+ upb_string_unref(subp);
+ }
+ }
+ free(m);
+}
+
+upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ upb_stdarray **arr = (void*)&m[f->offset];
+ if (!upb_stdmsg_has(_m, fval)) {
+ if (!*arr) {
+ *arr = malloc(sizeof(**arr));
+ (*arr)->size = 0;
+ (*arr)->ptr = NULL;
+ }
+ (*arr)->len = 0;
+ upb_stdmsg_sethas(m, fval);
+ }
+ return UPB_CONTINUE_WITH(*arr);
+}
+
+void upb_stdmsg_recycle(void **m, upb_msgdef *md) {
+ if (*m)
+ upb_msg_clear(*m, md);
+ else
+ *m = upb_stdmsg_new(md);
+}
+
+upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ void **subm = (void*)&m[f->offset];
+ if (!upb_stdmsg_has(m, fval)) {
+ upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
+ upb_stdmsg_sethas(m, fval);
}
return UPB_CONTINUE_WITH(*subm);
}
-upb_sflow_t upb_msgsink_startsubmsg_r(void *_a, upb_value _fval) {
- upb_array *a = _a;
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
assert(a != NULL);
- upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
-
- upb_msgdef md;
- md.size = fval.msg_size;
- md.set_flags_bytes = fval.set_flags_bytes;
- upb_fielddef f;
- f.set_bit_mask = fval.has_mask;
- f.set_bit_offset = fval.has_offset;
- f.label = UPB_LABEL(REPEATED);
- f.type = UPB_TYPE(MESSAGE);
- f.byte_offset = fval.val_offset;
-
- upb_arraylen_t oldlen = upb_array_len(a);
- upb_array_resize(a, &f, oldlen + 1);
- upb_valueptr p = _upb_array_getptr(a, &f, oldlen);
- upb_msg_recycle(p.msg, &md);
- return UPB_CONTINUE_WITH(*p.msg);
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
+ upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
+ return UPB_CONTINUE_WITH(*subm);
}
-INLINE void upb_msg_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
- (void)c;
- uint16_t msg_size = 0;
- uint8_t set_flags_bytes = 0;
- if (upb_issubmsg(f)) {
- upb_msgdef *md = upb_downcast_msgdef(f->def);
- msg_size = md->size;
- set_flags_bytes = md->set_flags_bytes;
+void *upb_stdmsg_seqbegin(void *_a) {
+ upb_stdarray *a = _a;
+ return a->len > 0 ? a->ptr : NULL;
+}
+
+#define NEXTFUNC(size) \
+ void *upb_stdmsg_ ## size ## byte_seqnext(void *_a, void *iter) { \
+ upb_stdarray *a = _a; \
+ void *next = (char*)iter + size; \
+ return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \
}
- upb_value_setuint64(&fh->fval,
- upb_msgsink_packfval(f->set_bit_offset, f->set_bit_mask,
- f->byte_offset, msg_size, set_flags_bytes));
- if (fh->repeated) upb_fhandlers_setstartseq(fh, upb_msgsink_startseq);
-#define CASE(upb_type, type) \
-case UPB_TYPE(upb_type): \
- upb_fhandlers_setvalue(fh, upb_isarray(f) ? \
- upb_msgsink_ ## type ## value_r : upb_msgsink_ ## type ## value); \
- break;
- switch (f->type) {
- CASE(DOUBLE, double)
- CASE(FLOAT, float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, bool)
- CASE(ENUM, int32)
- CASE(STRING, str)
- CASE(BYTES, str)
-#undef CASE
- case UPB_TYPE(MESSAGE):
- case UPB_TYPE(GROUP):
- upb_fhandlers_setstartsubmsg(fh,
- upb_isarray(f) ? upb_msgsink_startsubmsg_r : upb_msgsink_startsubmsg);
- break;
+
+NEXTFUNC(8)
+NEXTFUNC(4)
+NEXTFUNC(1)
+
+#define STDMSG(type) { static upb_accessor_vtbl vtbl = {NULL, &upb_stdmsg_startsubmsg, \
+ &upb_stdmsg_set ## type, &upb_stdmsg_has, &upb_stdmsg_get ## type, \
+ NULL, NULL, NULL}; return &vtbl; }
+#define STDMSG_R(type, size) { static upb_accessor_vtbl vtbl = { \
+ &upb_stdmsg_startseq, &upb_stdmsg_startsubmsg_r, &upb_stdmsg_set ## type ## _r, \
+ &upb_stdmsg_has, &upb_stdmsg_getptr, &upb_stdmsg_seqbegin, \
+ &upb_stdmsg_ ## size ## byte_seqnext, &upb_stdmsg_seqget ## type}; \
+ return &vtbl; }
+
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
+ if (upb_isseq(f)) {
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE): STDMSG_R(double, 8)
+ case UPB_TYPE(FLOAT): STDMSG_R(float, 4)
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64): STDMSG_R(uint64, 8)
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SFIXED64):
+ case UPB_TYPE(SINT64): STDMSG_R(int64, 8)
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(ENUM):
+ case UPB_TYPE(SFIXED32): STDMSG_R(int32, 4)
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32): STDMSG_R(uint32, 4)
+ case UPB_TYPE(BOOL): STDMSG_R(bool, 1)
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ case UPB_TYPE(GROUP):
+ case UPB_TYPE(MESSAGE): STDMSG_R(str, 8) // TODO: 32-bit
+ }
+ } else {
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE): STDMSG(double)
+ case UPB_TYPE(FLOAT): STDMSG(float)
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64): STDMSG(uint64)
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SFIXED64):
+ case UPB_TYPE(SINT64): STDMSG(int64)
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(ENUM):
+ case UPB_TYPE(SFIXED32): STDMSG(int32)
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32): STDMSG(uint32)
+ case UPB_TYPE(BOOL): STDMSG(bool)
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ case UPB_TYPE(GROUP):
+ case UPB_TYPE(MESSAGE): STDMSG(str)
+ }
+ }
+ return NULL;
+}
+
+static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
+ (void)c;
+ if (f->accessor) {
+ upb_fhandlers_setstartseq(fh, f->accessor->appendseq);
+ upb_fhandlers_setvalue(fh, f->accessor->set);
+ upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg);
+ upb_fhandlers_setfval(fh, f->fval);
}
}
-upb_mhandlers *upb_msg_reghandlers(upb_handlers *h, upb_msgdef *m) {
- return upb_handlers_regmsgdef(h, m, NULL, &upb_msg_onfreg, NULL);
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m) {
+ return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL);
}
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 4e1b4d5..b93037b 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -4,285 +4,122 @@
* Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * Data structure for storing a message of protobuf data. Unlike Google's
- * protobuf, upb_msg and upb_array are reference counted instead of having
- * exclusive ownership of their fields. This is a better match for dynamic
- * languages where statements like a.b = other_b are normal.
+ * Routines for reading and writing message data to an in-memory structure,
+ * similar to a C struct.
*
- * upb's parsers and serializers could also be used to populate and serialize
- * other kinds of message objects (even one generated by Google's protobuf).
+ * upb does not define one single message object that everyone must use.
+ * Rather it defines an abstract interface for reading and writing members
+ * of a message object, and all of the parsers and serializers use this
+ * abstract interface. This allows upb's parsers and serializers to be used
+ * regardless of what memory management scheme or synchronization model the
+ * application is using.
*
- * TODO: consider properly supporting const instances.
+ * A standard set of accessors is provided for doing simple reads and writes at
+ * a known offset into the message. These accessors should be used when
+ * possible, because they are specially optimized -- for example, the JIT can
+ * recognize them and emit specialized code instead of having to call the
+ * function at all. The application can substitute its own accessors when the
+ * standard accessors are not suitable.
*/
#ifndef UPB_MSG_H
#define UPB_MSG_H
#include <stdlib.h>
+#include "upb_def.h"
#include "upb_handlers.h"
#ifdef __cplusplus
extern "C" {
#endif
-// A pointer to a .proto value. The owner must have an out-of-band way of
-// knowing the type, so it knows which union member to use.
-typedef union {
- double *_double;
- float *_float;
- int32_t *int32;
- int64_t *int64;
- uint8_t *uint8;
- uint32_t *uint32;
- uint64_t *uint64;
- bool *_bool;
- upb_string **str;
- upb_msg **msg;
- upb_array **arr;
- void *_void;
-} upb_valueptr;
-
-INLINE upb_valueptr upb_value_addrof(upb_value *val) {
- upb_valueptr ptr = {&val->val._double};
- return ptr;
-}
-// Reads or writes a upb_value from an address represented by a upb_value_ptr.
-// We need to know the value type to perform this operation, because we need to
-// know how much memory to copy (and for big-endian machines, we need to know
-// where in the upb_value the data goes).
-//
-// For little endian-machines where we didn't mind overreading, we could make
-// upb_value_read simply use memcpy().
-INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
- upb_value val;
-
-#ifdef NDEBUG
-#define CASE(t, member_name) \
- case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break;
-#else
-#define CASE(t, member_name) \
- case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = upb_types[ft].inmemory_type; break;
-#endif
+/* upb_accessor ***************************************************************/
- switch(ft) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- CASE(STRING, str)
- CASE(BYTES, str)
- CASE(MESSAGE, msg)
- CASE(GROUP, msg)
- case UPB_VALUETYPE_ARRAY:
- val.val.arr = *ptr.arr;
-#ifndef NDEBUG
- val.type = UPB_VALUETYPE_ARRAY;
-#endif
- break;
- default: assert(false);
- }
- return val;
+// A upb_accessor is a table of function pointers for doing reads and writes
+// for one specific upb_fielddef. Each field has a separate accessor, which
+// lives in the fielddef.
-#undef CASE
-}
+typedef bool upb_has_reader(void *m, upb_value fval);
+typedef upb_value upb_value_reader(void *m, upb_value fval);
-INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
- upb_fieldtype_t ft) {
-#ifndef NDEBUG
- if (ft == UPB_VALUETYPE_ARRAY) {
- assert(val.type == UPB_VALUETYPE_ARRAY);
- } else if (val.type != UPB_VALUETYPE_RAW) {
- assert(val.type == upb_types[ft].inmemory_type);
- }
-#endif
-#define CASE(t, member_name) \
- case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
-
- switch(ft) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- CASE(STRING, str)
- CASE(BYTES, str)
- CASE(MESSAGE, msg)
- CASE(GROUP, msg)
- case UPB_VALUETYPE_ARRAY:
- *ptr.arr = val.val.arr;
- break;
- default: assert(false);
- }
-
-#undef CASE
-}
+typedef void *upb_seqbegin_handler(void *s);
+typedef void *upb_seqnext_handler(void *s, void *iter);
+typedef upb_value upb_seqget_handler(void *iter);
+INLINE bool upb_seq_done(void *iter) { return iter == NULL; }
+typedef struct _upb_accessor_vtbl {
+ // Writers. These take an fval as a parameter because the callbacks are used
+ // as upb_handlers, but the fval is always the fielddef for that field.
+ upb_startfield_handler *appendseq; // Repeated fields only.
+ upb_startfield_handler *appendsubmsg; // Submsg fields (repeated or no).
+ upb_value_handler *set; // Scalar fields (repeated or no).
-/* upb_array ******************************************************************/
+ // Readers.
+ upb_has_reader *has;
+ upb_value_reader *get;
+ upb_seqbegin_handler *seqbegin;
+ upb_seqnext_handler *seqnext;
+ upb_seqget_handler *seqget;
+} upb_accessor_vtbl;
-typedef uint32_t upb_arraylen_t;
-struct _upb_array {
- upb_atomic_t refcount;
- // "len" and "size" are measured in elements, not bytes.
- int32_t len;
- int32_t size;
- char *ptr;
-};
-
-void _upb_array_free(upb_array *a, upb_fielddef *f);
-INLINE upb_valueptr _upb_array_getptrforsize(upb_array *a, size_t type_size,
- int32_t elem) {
- assert(elem >= 0);
- upb_valueptr p;
- p._void = &a->ptr[elem * type_size];
- return p;
-}
+// Registers handlers for writing into a message of the given type.
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m);
-INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f,
- uint32_t elem) {
- return _upb_array_getptrforsize(a, upb_types[f->type].size, elem);
-}
+// Returns an stdmsg accessor for the given fielddef.
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
-upb_array *upb_array_new(void);
-INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) {
- if (a && upb_atomic_unref(&a->refcount)) _upb_array_free(a, f);
-}
+/* upb_msg/upb_seq ************************************************************/
-void upb_array_recycle(upb_array **arr);
-INLINE uint32_t upb_array_len(upb_array *a) {
- return a->len;
-}
+// upb_msg and upb_seq allow for generic access to a message through its
+// accessor vtable. Note that these do *not* allow you to create, destroy, or
+// take references on the objects -- these operations are specifically outside
+// the scope of what the accessors define.
-INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
- upb_arraylen_t i) {
- assert(i < upb_array_len(arr));
- return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
-}
+// Clears all hasbits.
+// TODO: Add a separate function for setting primitive values back to their
+// defaults (but not strings, submessages, or arrays).
+void upb_msg_clear(void *msg, upb_msgdef *md);
+// Could add a method that recursively clears submessages, strings, and
+// arrays if desired. This could be a win if you wanted to merge without
+// needing hasbits, because during parsing you would never clear submessages
+// or arrays. Also this could be desired to provide proto2 operations on
+// generated messages.
-/* upb_msg ********************************************************************/
-
-// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
-// upb_msgdef. While this makes the API a bit more cumbersome to use, this
-// choice was made for a few important reasons:
-//
-// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
-// a high overhead for small messages.
-// 2. you would want the msg to own a ref on its msgdef, but this would require
-// an atomic operation for every message create or destroy!
-struct _upb_msg {
- upb_atomic_t refcount;
- uint8_t data[4]; // We allocate the appropriate amount per message.
-};
-
-void _upb_msg_free(upb_msg *msg, upb_msgdef *md);
-
-INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) {
- upb_valueptr p;
- p._void = &msg->data[f->byte_offset];
- return p;
+INLINE bool upb_msg_has(void *m, upb_fielddef *f) {
+ return f->accessor && f->accessor->has(m, f->fval);
}
-// Creates a new msg of the given type.
-upb_msg *upb_msg_new(upb_msgdef *md);
-
-// Unrefs the given message.
-INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
- if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
-}
-
-INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
- assert(msg);
- upb_atomic_ref(&msg->refcount);
- return msg;
+// May only be called for fields that are known to be set.
+INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) {
+ assert(upb_msg_has(m, f));
+ return f->accessor->get(m, f->fval);
}
-// Modifies *msg to point to a newly initialized msg instance. If the msg had
-// no other referents, reuses the same msg, otherwise allocates a new one.
-// The caller *must* own a ref on the msg prior to calling this method!
-void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
-
-// Tests whether the given field is explicitly set, or whether it will return a
-// default.
-INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
- return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
+INLINE void *upb_seq_begin(void *s, upb_fielddef *f) {
+ assert(f->accessor);
+ return f->accessor->seqbegin(s);
}
-
-// We have several options for handling default values:
-// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
-// overwriting submessage pointers to point to the default instance again.
-// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
-// if it is not set. upb_msg_clear() only clears the set bits.
-// We lazily clear objects if/when we reuse them.
-// 3. inside upb_msg_clear(), overwrite all values to be their default,
-// and recurse into submessages to set all their values to defaults also.
-// 4. as a hybrid of (1) and (3), clear all set bits in upb_msg_clear()
-// but also overwrite all primitive values to be their defaults. Only
-// accessors for non-primitive values (submessage, strings, and arrays)
-// need to check the has-bits in their accessors -- primitive values can
-// always be returned straight from the msg.
-//
-// (1) is undesirable, because it prevents us from caching sub-objects.
-// (2) makes clear() cheaper, but makes get() branchier.
-// (3) makes get() less branchy, but makes clear() traverse the message graph.
-// (4) is probably the best bang for the buck.
-//
-// For the moment upb does (2), but we should implement (4). Google's protobuf
-// does (3), which is likely part of the reason that even our table-based
-// decoder beats it in some benchmarks.
-
-// For submessages and strings, the returned value is not owned.
-upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f);
-
-// A specialized version of the previous that is cheaper because it doesn't
-// support submessages or arrays.
-INLINE upb_value upb_msg_getscalar(upb_msg *msg, upb_fielddef *f) {
- if (upb_msg_has(msg, f)) {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
- } else {
- return f->default_value;
- }
+INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) {
+ assert(f->accessor);
+ assert(!upb_seq_done(iter));
+ return f->accessor->seqnext(s, iter);
}
-
-// Sets the given field to the given value. If the field is a string, array,
-// or submessage, releases the ref on any object we may have been referencing
-// and takes a ref on the new object (if any).
-void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
-
-// Unsets all field values back to their defaults.
-INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
- memset(msg->data, 0, md->set_flags_bytes);
+INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) {
+ assert(f->accessor);
+ assert(!upb_seq_done(iter));
+ return f->accessor->seqget(iter);
}
-// Registers handlers for populating a msg for the given upb_msgdef.
-// The upb_msg itself must be passed as the param to the src.
-upb_mhandlers *upb_msg_reghandlers(upb_handlers *h, upb_msgdef *md);
-
/* upb_msgvisitor *************************************************************/
-// Calls a set of upb_handlers with the contents of a upb_msg.
+// A upb_msgvisitor reads data from an in-memory structure using its accessors,
+// pushing the results to a given set of upb_handlers.
+// TODO: not yet implemented.
+
typedef struct {
upb_fhandlers *fh;
upb_fielddef *f;
@@ -314,6 +151,118 @@ void upb_msgvisitor_uninit(upb_msgvisitor *v);
void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m);
void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
+
+/* Standard writers. **********************************************************/
+
+// Allocates a new stdmsg.
+void *upb_stdmsg_new(upb_msgdef *md);
+
+// Recursively frees any strings or submessages that the message refers to.
+void upb_stdmsg_free(void *m, upb_msgdef *md);
+
+// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit.
+upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
+upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
+ uint16_t subm_size, uint8_t subm_setbytes);
+
+// Value writers for every in-memory type: write the data to a known offset
+// from the closure "c" and set the hasbit (if any).
+// TODO: can we get away with having only one for int64, uint64, double, etc?
+// The main thing in the way atm is that the upb_value is strongly typed.
+// in debug mode.
+upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
+
+// Value writers for repeated fields: the closure points to a standard array
+// struct, appends the value to the end of the array, resizing with realloc()
+// if necessary.
+typedef struct {
+ char *ptr;
+ int32_t len; // Number of elements present.
+ int32_t size; // Number of elements allocated.
+} upb_stdarray;
+
+upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
+
+// Writers for C strings (NULL-terminated): we can find a char* at a known
+// offset from the closure "c". Calls realloc() on the pointer to allocate
+// the memory (TODO: investigate whether checking malloc_usable_size() would
+// be cheaper than realloc()). Also sets the hasbit, if any.
+//
+// Since the string is NULL terminated and does not store an explicit length,
+// these are not suitable for binary data that can contain NULLs.
+upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for length-delimited strings: we explicitly store the length, so
+// the data can contain NULLs. Stores the data using upb_stdarray
+// which is located at a known offset from the closure "c" (note that it
+// is included inline rather than pointed to). Also sets the hasbit, if any.
+upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for startseq and startmsg which allocate (or reuse, if possible)
+// a sub data structure (upb_stdarray or a submessage, respectively),
+// setting the hasbit. If the hasbit is already set, the existing data
+// structure is used verbatim. If the hasbit is not already set, the pointer
+// is checked for NULL. If it is NULL, a new substructure is allocated,
+// cleared, and used. If it is not NULL, the existing substructure is
+// cleared and reused.
+//
+// If there is no hasbit, we always behave as if the hasbit was not set,
+// so any existing data for this array or submessage is cleared. In most
+// cases this will be fine since each array or non-repeated submessage should
+// occur at most once in the stream. But if the client is using "concatenation
+// as merging", it will want to make sure hasbits are allocated so merges can
+// happen appropriately.
+//
+// If there was a demand for the behavior that absence of a hasbit acts as if
+// the bit was always set, we could provide that also. But Clear() would need
+// to act recursively, which is less efficient since it requires an extra pass
+// over the tree.
+upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
+
+
+/* Standard readers. **********************************************************/
+
+bool upb_stdmsg_has(void *c, upb_value fval);
+void *upb_stdmsg_seqbegin(void *c);
+
+upb_value upb_stdmsg_getint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getdouble(void *c, upb_value fval);
+upb_value upb_stdmsg_getfloat(void *c, upb_value fval);
+upb_value upb_stdmsg_getbool(void *c, upb_value fval);
+upb_value upb_stdmsg_getptr(void *c, upb_value fval);
+
+void *upb_stdmsg_8byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_4byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_1byte_seqnext(void *c, void *iter);
+
+upb_value upb_stdmsg_seqgetint64(void *c);
+upb_value upb_stdmsg_seqgetint32(void *c);
+upb_value upb_stdmsg_seqgetuint64(void *c);
+upb_value upb_stdmsg_seqgetuint32(void *c);
+upb_value upb_stdmsg_seqgetdouble(void *c);
+upb_value upb_stdmsg_seqgetfloat(void *c);
+upb_value upb_stdmsg_seqgetbool(void *c);
+upb_value upb_stdmsg_seqgetptr(void *c);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/upb_string.h b/src/upb_string.h
index 1463bbf..1f92850 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -107,6 +107,8 @@ void _upb_string_free(upb_string *str);
// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
// UNLESS THE STRING IS SYNCHRONIZED.
INLINE void upb_string_unref(upb_string *str) {
+ if (str) {
+ }
if (str && upb_atomic_read(&str->refcount) > 0 &&
upb_atomic_unref(&str->refcount)) {
_upb_string_free(str);
@@ -129,7 +131,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
int refcount = upb_atomic_read(&str->refcount);
if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
// We don't ref the special <0 refcount for static strings.
- if (refcount > 0) upb_atomic_ref(&str->refcount);
+ if (refcount > 0) {
+ upb_atomic_ref(&str->refcount);
+ }
return str;
}
diff --git a/src/upb_table.h b/src/upb_table.h
index 9b53a37..631709c 100644
--- a/src/upb_table.h
+++ b/src/upb_table.h
@@ -103,6 +103,12 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) {
void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val);
void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update
void upb_inttable_compact(upb_inttable *t);
+INLINE void upb_strtable_clear(upb_strtable *t) {
+ // TODO: improve.
+ uint16_t entry_size = t->t.entry_size;
+ upb_strtable_free(t);
+ upb_strtable_init(t, 8, entry_size);
+}
INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
uint32_t bucket = k & t->t.mask; // Identity hash for ints.
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback