From 559e23c796f973a65d05c76e211835b126ee8ac8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 17 Jun 2011 10:34:29 -0700 Subject: Major refactoring: abandon upb_msg, add upb_accessors. Next on the chopping block is upb_string. --- Makefile | 5 +- benchmarks/parsestream.upb_table.c | 4 +- benchmarks/parsetostruct.upb_table.c | 13 +- src/upb.h | 7 +- src/upb_decoder.c | 10 +- src/upb_decoder.h | 12 +- src/upb_def.c | 1458 +++++++++++----------------------- src/upb_def.h | 371 +++++---- src/upb_descriptor.c | 548 +++++++++++++ src/upb_descriptor.h | 67 ++ src/upb_glue.c | 49 +- src/upb_glue.h | 8 +- src/upb_handlers.c | 4 +- src/upb_msg.c | 611 ++++++-------- src/upb_msg.h | 429 +++++----- src/upb_string.h | 6 +- src/upb_table.h | 6 + tests/test_decoder.c | 9 +- tests/test_vs_proto2.cc | 42 +- tests/tests.c | 4 +- 20 files changed, 1847 insertions(+), 1816 deletions(-) create mode 100644 src/upb_descriptor.c create mode 100644 src/upb_descriptor.h diff --git a/Makefile b/Makefile index 437cf5a..4a2593b 100644 --- a/Makefile +++ b/Makefile @@ -73,12 +73,14 @@ $(ALLSRC): perf-cppflags CORE= \ src/upb.c \ src/upb_handlers.c \ + src/upb_descriptor.c \ src/upb_table.c \ src/upb_string.c \ src/upb_def.c \ src/upb_msg.c \ src/upb_varint.c \ + # Common encoders/decoders -- you're almost certain to want these. STREAM= \ src/upb_decoder.c \ @@ -101,7 +103,8 @@ TESTS_SRC= \ tests/test_string.c \ tests/tests.c \ tests/tests_varint.c \ - tests/test_vs_proto2.cc + + #tests/test_vs_proto2.cc #tests/test_stream.c \ diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index a4aebd8..b1763da 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -36,7 +36,7 @@ static bool initialize() upb_printerr(&status); return false; } - upb_parsedesc(s, fds_str, &status); + upb_read_descriptor(s, fds_str, &status); upb_string_unref(fds_str); if(!upb_ok(&status)) { @@ -64,7 +64,7 @@ static bool initialize() // Cause all messages to be read, but do nothing when they are. upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL, NULL, NULL}; upb_handlers_reghandlerset(handlers, def, &hset); - upb_decoder_init(&decoder, handlers); + upb_decoder_initforhandlers(&decoder, handlers); upb_handlers_unref(handlers); upb_stringsrc_init(&stringsrc); return true; diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c index f05395f..f0ddb99 100644 --- a/benchmarks/parsetostruct.upb_table.c +++ b/benchmarks/parsetostruct.upb_table.c @@ -9,7 +9,7 @@ static upb_string *input_str; static upb_msgdef *def; -static upb_msg *msg; +static void *msg; static upb_stringsrc strsrc; static upb_decoder d; @@ -25,7 +25,7 @@ static bool initialize() upb_printerr(&status); return false; } - upb_parsedesc(s, fds_str, &status); + upb_read_descriptor(s, fds_str, &status); upb_string_unref(fds_str); if(!upb_ok(&status)) { @@ -49,13 +49,10 @@ static bool initialize() return false; } upb_status_uninit(&status); - msg = upb_msg_new(def); + msg = upb_stdmsg_new(def); upb_stringsrc_init(&strsrc); - upb_handlers *handlers = upb_handlers_new(); - upb_msg_reghandlers(handlers, def); - upb_decoder_init(&d, handlers); - upb_handlers_unref(handlers); + upb_decoder_initformsgdef(&d, def); if (!BYREF) { // Pretend the input string is stack-allocated, which will force its data @@ -74,7 +71,7 @@ static void cleanup() input_str->refcount.v = 1; } upb_string_unref(input_str); - upb_msg_unref(msg, def); + upb_stdmsg_free(msg, def); upb_def_unref(UPB_UPCAST(def)); upb_stringsrc_uninit(&strsrc); upb_decoder_uninit(&d); diff --git a/src/upb.h b/src/upb.h index d3e7b34..59429f4 100644 --- a/src/upb.h +++ b/src/upb.h @@ -146,6 +146,7 @@ typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_BYTESRC 32 #define UPB_VALUETYPE_RAW 33 #define UPB_VALUETYPE_FIELDDEF 34 +#define UPB_VALUETYPE_PTR 35 // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. @@ -196,11 +197,9 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); -UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); -UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE)); -UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY); -UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction. UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF); +UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR); extern upb_value UPB_NO_VALUE; diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 34cd811..a44b561 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -11,6 +11,7 @@ #include "upb_bytestream.h" #include "upb_decoder.h" #include "upb_varint.h" +#include "upb_msg.h" // Used for frames that have no specific end offset: groups, repeated primitive // fields inside groups, and the top-level message. @@ -346,7 +347,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, d->ptr = d->buf + bottom->end_offset; } -void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { +void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) { upb_dispatcher_init( &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d); #ifdef UPB_USE_JIT_X64 @@ -388,6 +389,13 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { } } +void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) { + upb_handlers *h = upb_handlers_new(); + upb_accessors_reghandlers(h, m); + upb_decoder_initforhandlers(d, h); + upb_handlers_unref(h); +} + void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED; d->bytesrc = bytesrc; diff --git a/src/upb_decoder.h b/src/upb_decoder.h index a98b235..e9bc0b4 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -91,8 +91,16 @@ typedef struct { struct _upb_decoder; typedef struct _upb_decoder upb_decoder; -// Allocates and frees a upb_decoder, respectively. -void upb_decoder_init(upb_decoder *d, upb_handlers *handlers); +// Initializes/uninitializes a decoder for calling into the given handlers +// or to write into the given msgdef, given its accessors). Takes a ref +// on the handlers or msgdef. +void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h); + +// Equivalent to: +// upb_accessors_reghandlers(m, h); +// upb_decoder_initforhandlers(d, h); +// except possibly more efficient, by using cached state in the msgdef. +void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m); void upb_decoder_uninit(upb_decoder *d); // Resets the internal state of an already-allocated decoder. This puts it in a diff --git a/src/upb_def.c b/src/upb_def.c index 791b885..45e7f73 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -7,30 +7,10 @@ #include #include -#include #include "upb_def.h" -#include "upb_msg.h" #define alignof(t) offsetof(struct { char c; t x; }, x) -static int upb_div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_string *upb_join(upb_string *base, upb_string *name) { - if (!base || upb_string_len(base) == 0) { - return upb_string_getref(name); - } else { - return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, - UPB_STRARG(base), UPB_STRARG(name)); - } -} - /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) { @@ -39,181 +19,18 @@ static int my_memrchr(char *data, char c, size_t len) return off; } -/* upb_def ********************************************************************/ - -// Defs are reference counted, but can have cycles when types are -// self-recursive or mutually recursive, so we need to be capable of collecting -// the cycles. In our situation defs are immutable (so cycles cannot be -// created or destroyed post-initialization). We need to be thread-safe but -// want to avoid locks if at all possible and rely only on atomic operations. -// -// Our scheme is as follows. First we give each def a flag indicating whether -// it is part of a cycle or not. Because defs are immutable, this flag will -// never change. For acyclic defs, we can use a naive algorithm and avoid the -// overhead of dealing with cycles. Most defs will be acyclic, and most cycles -// will be very short. -// -// For defs that participate in cycles we keep two reference counts. One -// tracks references that come from outside the cycle (we call these external -// references), and is incremented and decremented like a regular refcount. -// The other is a cycle refcount, and works as follows. Every cycle is -// considered distinct, even if two cycles share members. For example, this -// graph has two distinct cycles: -// -// A-->B-->C -// ^ | | -// +---+---+ -// -// The cycles in this graph are AB and ABC. When A's external refcount -// transitions from 0->1, we say that A takes "cycle references" on both -// cycles. Taking a cycle reference means incrementing the cycle refcount of -// all defs in the cycle. Since A and B are common to both cycles, A and B's -// cycle refcounts will be incremented by two, and C's will be incremented by -// one. Likewise, when A's external refcount transitions from 1->0, we -// decrement A and B's cycle refcounts by two and C's by one. We collect a -// cyclic type when its cycle refcount drops to zero. A precondition for this -// is that the external refcount has dropped to zero also. -// -// This algorithm is relatively cheap, since it only requires extra work when -// the external refcount on a cyclic type transitions from 0->1 or 1->0. - -static void upb_msgdef_free(upb_msgdef *m); -static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); - -static void upb_def_free(upb_def *def) -{ - switch(def->type) { - case UPB_DEF_MSG: - upb_msgdef_free(upb_downcast_msgdef(def)); - break; - case UPB_DEF_ENUM: - upb_enumdef_free(upb_downcast_enumdef(def)); - break; - case UPB_DEF_SVC: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); - break; - default: - assert(false); - } -} - -// Depth-first search for all cycles that include cycle_base. Returns the -// number of paths from def that lead to cycle_base, which is equivalent to the -// number of cycles def is in that include cycle_base. -// -// open_defs tracks the set of nodes that are currently being visited in the -// search so we can stop the search if we detect a cycles that do not involve -// cycle_base. We can't color the nodes as we go by writing to a member of the -// def, because another thread could be performing the search concurrently. -static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, - upb_msgdef **open_defs, int num_open_defs, - bool ref) { - bool found = false; - for(int i = 0; i < num_open_defs; i++) { - if(open_defs[i] == m) { - // We encountered a cycle that did not involve cycle_base. - found = true; - break; - } - } - - if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { - return 0; - } else if(m == cycle_base) { - return 1; - } else { - int path_count = 0; - if(cycle_base == NULL) { - cycle_base = m; - } else { - open_defs[num_open_defs++] = m; - } - upb_msg_iter iter = upb_msg_begin(m); - for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { - upb_fielddef *f = upb_msg_iter_field(iter); - upb_def *def = f->def; - if(upb_issubmsg(f) && def->is_cyclic) { - upb_msgdef *sub_m = upb_downcast_msgdef(def); - path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, - num_open_defs, ref); - } - } - if(ref) { - upb_atomic_add(&m->cycle_refcount, path_count); - } else { - if(upb_atomic_add(&m->cycle_refcount, -path_count)) - upb_def_free(UPB_UPCAST(m)); - } - return path_count; - } -} - -void _upb_def_reftozero(upb_def *def) { - if(def->is_cyclic) { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); - } else { - upb_def_free(def); - } -} - -void _upb_def_cyclic_ref(upb_def *def) { - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); -} - -static void upb_def_init(upb_def *def, upb_deftype type) { - def->type = type; - def->is_cyclic = 0; // We detect this later, after resolving refs. - def->search_depth = 0; - def->fqname = NULL; - upb_atomic_init(&def->refcount, 1); -} - -static void upb_def_uninit(upb_def *def) { - upb_string_unref(def->fqname); -} - - -/* upb_defbuilder ************************************************************/ - -// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in -// the format defined in descriptor.proto. The output of a upb_defbuilder is -// a list of upb_def* that possibly contain unresolved references. -// -// We use a separate object (upb_defbuilder) instead of having the defs handle -// the parse themselves because we need to store state that is only necessary -// during the building process itself. -// -// All of the handlers registration in this file must be done using the -// low-level upb_register_typed_* interface, since we might not have a msgdef -// yet (in the case of bootstrapping). This makes it more laborious than it -// will be for real users. - -// upb_deflist: A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { +void upb_deflist_init(upb_deflist *l) { l->size = 8; l->defs = malloc(l->size * sizeof(void*)); l->len = 0; } -static void upb_deflist_uninit(upb_deflist *l) { +void upb_deflist_uninit(upb_deflist *l) { for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]); free(l->defs); } -static void upb_deflist_push(upb_deflist *l, upb_def *d) { +void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { l->size *= 2; l->defs = realloc(l->defs, l->size * sizeof(void*)); @@ -221,179 +38,74 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } -static upb_def *upb_deflist_last(upb_deflist *l) { - return l->defs[l->len-1]; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -// We keep a stack of all the messages scopes we are currently in, as well as -// the top-level file scope. This is necessary to correctly qualify the -// definitions that are contained inside. "name" tracks the name of the -// message or package (a bare name -- not qualified by any enclosing scopes). -typedef struct { - upb_string *name; - // Index of the first def that is under this scope. For msgdefs, the - // msgdef itself is at start-1. - int start; -} upb_defbuilder_frame; - -struct _upb_defbuilder { - upb_deflist defs; - upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; - upb_status status; - upb_symtab *symtab; - - uint32_t number; - upb_string *name; - bool saw_number; - bool saw_name; - - upb_string *default_string; - - upb_fielddef *f; -}; - -// Forward declares for top-level file descriptors. -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h); -static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h); - -upb_defbuilder *upb_defbuilder_new(upb_symtab *s) { - upb_defbuilder *b = malloc(sizeof(*b)); - upb_deflist_init(&b->defs); - upb_status_init(&b->status); - b->symtab = s; - b->stack_len = 0; - b->name = NULL; - b->default_string = NULL; - return b; -} - -static void upb_defbuilder_free(upb_defbuilder *b) { - upb_string_unref(b->name); - upb_status_uninit(&b->status); - upb_deflist_uninit(&b->defs); - upb_string_unref(b->default_string); - while (b->stack_len > 0) { - upb_defbuilder_frame *f = &b->stack[--b->stack_len]; - upb_string_unref(f->name); - } - free(b); -} - -static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { - if (b->stack_len <= 1) return NULL; - int index = b->stack[b->stack_len-1].start - 1; - assert(index >= 0); - return upb_downcast_msgdef(b->defs.defs[index]); -} -static upb_def *upb_defbuilder_last(upb_defbuilder *b) { - return upb_deflist_last(&b->defs); -} - -// Start/end handlers for FileDescriptorProto and DescriptorProto (the two -// entities that have names and can contain sub-definitions. -void upb_defbuilder_startcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = &b->stack[b->stack_len++]; - f->start = b->defs.len; - f->name = NULL; -} +/* upb_def ********************************************************************/ -void upb_defbuilder_endcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = &b->stack[--b->stack_len]; - upb_deflist_qualify(&b->defs, f->name, f->start); - upb_string_unref(f->name); -} +static void upb_msgdef_free(upb_msgdef *m); +static void upb_enumdef_free(upb_enumdef *e); +static void upb_unresolveddef_free(struct _upb_unresolveddef *u); -void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { - upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; - upb_string_unref(f->name); - f->name = upb_string_getref(str); -} +#ifndef NDEBUG +static bool upb_def_ismutable(upb_def *def) { return def->symtab == NULL; } +#endif -// Handlers for google.protobuf.FileDescriptorProto. -static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_defbuilder_startcontainer(b); - return UPB_CONTINUE; +static void upb_def_free(upb_def *def) { + switch (def->type) { + case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; + case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; + case UPB_DEF_UNRESOLVED: + upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; + default: + assert(false); + } } -static void upb_defbuilder_FileDescriptorProto_endmsg(void *_b, - upb_status *status) { - (void)status; - upb_defbuilder *b = _b; - upb_defbuilder_endcontainer(b); +upb_def *upb_def_dup(upb_def *def) { + switch (def->type) { + case UPB_DEF_MSG: return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def))); + case UPB_DEF_ENUM: return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def))); + default: assert(false); return NULL; + } } -static upb_flow_t upb_defbuilder_FileDescriptorProto_package(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_defbuilder_setscopename(b, upb_value_getstr(val)); - return UPB_CONTINUE; +// Prior to being in a symtab, the def's refcount controls the lifetime of the +// def itself. If the refcount falls to zero, the def is deleted. Once the +// def belongs to a symtab, the def is owned by the symtab and its refcount +// determines whether the def owns a ref on the symtab or not. +void upb_def_ref(upb_def *def) { + if (upb_atomic_ref(&def->refcount) && def->symtab) + upb_symtab_ref(def->symtab); } -static upb_mhandlers *upb_defbuilder_register_FileDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_defbuilder_FileDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_defbuilder_FileDescriptorProto_endmsg); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); - upb_fhandlers_setvalue(f, &upb_defbuilder_FileDescriptorProto_package); - - upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, - upb_msgdef_register_DescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); - // TODO: services, extensions - return m; +static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) { + assert(upb_atomic_read(&d->refcount) > 0); + d->symtab = s; + if (!upb_atomic_unref(&d->refcount)) upb_symtab_ref(s); + upb_msgdef *m = upb_dyncast_msgdef(d); + if (m) upb_inttable_compact(&m->itof); } -#undef FNUM -#undef FTYPE -// Handlers for google.protobuf.FileDescriptorSet. -static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, - bool allow_redef, upb_status *status); - -static void upb_defbuilder_FileDescriptorSet_onendmsg(void *_b, - upb_status *status) { - upb_defbuilder *b = _b; - if (upb_ok(status)) - upb_symtab_add_defs(b->symtab, b->defs.defs, b->defs.len, false, status); - upb_defbuilder_free(b); +void upb_def_unref(upb_def *def) { + if (!def) return; + if (upb_atomic_unref(&def->refcount)) { + if (def->symtab) { + upb_symtab_unref(def->symtab); + // Def might be deleted now. + } else { + upb_def_free(def); + } + } } -static upb_mhandlers *upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setendmsg(m, upb_defbuilder_FileDescriptorSet_onendmsg); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE - upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true, - upb_defbuilder_register_FileDescriptorProto(h)); - return m; +static void upb_def_init(upb_def *def, upb_deftype_t type) { + def->type = type; + def->fqname = NULL; + def->symtab = NULL; + upb_atomic_init(&def->refcount, 1); } -#undef FNUM -#undef FTYPE -upb_mhandlers *upb_defbuilder_reghandlers(upb_handlers *h) { - h->should_jit = false; - return upb_defbuilder_register_FileDescriptorSet(h); +static void upb_def_uninit(upb_def *def) { + upb_string_unref(def->fqname); } @@ -428,6 +140,14 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ +upb_enumdef *upb_enumdef_new() { + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + return e; +} + static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { @@ -440,129 +160,29 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } -// google.protobuf.EnumValueDescriptorProto. -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - b->saw_number = false; - b->saw_name = false; - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_string_unref(b->name); - b->name = upb_string_getref(upb_value_getstr(val)); - b->saw_name = true; - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->number = upb_value_getint32(val); - b->saw_number = true; - return UPB_CONTINUE; -} - -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b, - upb_status *status) { - upb_defbuilder *b = _b; - if(!b->saw_number || !b->saw_name) { - upb_seterr(status, UPB_ERROR, "Enum value missing name or number."); - return; - } - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - if (upb_inttable_count(&e->iton) == 0) { - // The default value of an enum (in the absence of an explicit default) is - // its first listed value. - e->default_value = b->number; - } - upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; - upb_iton_ent iton_ent = {0, b->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, b->number, &iton_ent); - // We don't unref "name" because we pass our ref to the iton entry of the - // table. strtables can ref their keys, but the inttable doesn't know that - // the value is a string. - b->name = NULL; -} - -static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f; - f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name); - - f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number); - return m; -} -#undef FNUM -#undef FTYPE - -// google.protobuf.EnumDescriptorProto. -static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); - upb_deflist_push(&b->defs, UPB_UPCAST(e)); - return UPB_CONTINUE; -} - -static void upb_enumdef_EnumDescriptorProto_endmsg(void *_b, upb_status *status) { - upb_defbuilder *b = _b; - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - if (upb_defbuilder_last((upb_defbuilder*)_b)->fqname == NULL) { - upb_seterr(status, UPB_ERROR, "Enum had no name."); - return; - } - if (upb_inttable_count(&e->iton) == 0) { - upb_seterr(status, UPB_ERROR, "Enum had no values."); - return; +upb_enumdef *upb_enumdef_dup(upb_enumdef *e) { + upb_enumdef *new_e = upb_enumdef_new(); + upb_enum_iter i; + for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i), + upb_enum_iter_number(i))); } + return new_e; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_b, - upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - upb_string_unref(e->base.fqname); - e->base.fqname = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; +bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) { + if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false; + upb_ntoi_ent ntoi_ent = {{name, 0}, num}; + upb_iton_ent iton_ent = {0, name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name + return true; } -static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name); - - upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true, - upb_enumdef_register_EnumValueDescriptorProto(h)); - return m; +void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { + assert(upb_def_ismutable(UPB_UPCAST(e))); + e->defaultval = val; } -#undef FNUM -#undef FTYPE upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. @@ -573,59 +193,89 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { return upb_inttable_next(&e->iton, iter); } -upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { +upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) { upb_iton_ent *e = (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); return e ? e->string : NULL; } -bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, upb_enumval_t *num) { +bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) { upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name); if (!e) return false; - *num = e->value; + if (num) *num = e->value; return true; } /* upb_fielddef ***************************************************************/ +upb_fielddef *upb_fielddef_new() { + upb_fielddef *f = malloc(sizeof(*f)); + f->msgdef = NULL; + f->def = NULL; + upb_atomic_init(&f->refcount, 1); + f->finalized = false; + f->type = 0; + f->label = UPB_LABEL(OPTIONAL); + f->hasbit = 0; + f->offset = 0; + f->number = 0; // not a valid field number. + f->name = NULL; + f->accessor = NULL; + upb_value_setfielddef(&f->fval, f); + return f; +} + static void upb_fielddef_free(upb_fielddef *f) { if (upb_isstring(f)) { - upb_string_unref(upb_value_getstr(f->default_value)); - } else if (upb_issubmsg(f)) { - upb_msg *m = upb_value_getmsg(f->default_value); - assert(m); - // We cheat a bit here. We need to unref msg, but we don't have a reliable - // way of accessing the msgdef (which is required by upb_msg_unref()), - // because f->def may have already been collected as part of a cycle if - // this is an unowned ref. But we know that default messages never contain - // references to other messages, and their only string references are to - // the singleton empty string, so we can safely unref+free msg directly. - if (upb_atomic_unref(&m->refcount)) free(m); + upb_string_unref(upb_value_getstr(f->defaultval)); } upb_string_unref(f->name); - if(f->owned) { - upb_def_unref(f->def); - } free(f); } +void upb_fielddef_ref(upb_fielddef *f) { + // TODO. + (void)f; +} + +void upb_fielddef_unref(upb_fielddef *f) { + // TODO. + (void)f; + if (!f) return; + if (upb_atomic_unref(&f->refcount)) { + if (f->msgdef) { + upb_msgdef_unref(f->msgdef); + // fielddef might be deleted now. + } else { + upb_fielddef_free(f); + } + } +} + +upb_fielddef *upb_fielddef_dup(upb_fielddef *f) { + upb_fielddef *newf = upb_fielddef_new(); + newf->msgdef = f->msgdef; + newf->type = f->type; + newf->label = f->label; + newf->number = f->number; + newf->name = f->name; + upb_fielddef_settypename(newf, f->def->fqname); + return f; +} + static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { - if(f->owned) upb_def_unref(f->def); + assert(upb_dyncast_unresolveddef(f->def)); + upb_def_unref(f->def); f->def = def; - // We will later make the ref unowned if it is a part of a cycle. - f->owned = true; - upb_def_ref(def); - if (upb_issubmsg(f)) { - upb_msgdef *md = upb_downcast_msgdef(def); - upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message)); - } else if (f->type == UPB_TYPE(ENUM)) { - upb_string *str = upb_value_getstr(f->default_value); + if (f->type == UPB_TYPE(ENUM)) { + // Resolve the enum's default from a string to an integer. + upb_string *str = upb_value_getstr(f->defaultval); assert(str); // Should point to either a real default or the empty string. upb_enumdef *e = upb_downcast_enumdef(f->def); - upb_enumval_t val = 0; + int32_t val = 0; if (str == upb_emptystring()) { - upb_value_setint32(&f->default_value, e->default_value); + upb_value_setint32(&f->defaultval, e->defaultval); } else { bool success = upb_enumdef_ntoi(e, str, &val); upb_string_unref(str); @@ -634,368 +284,201 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { "member of the enum", UPB_STRARG(str)); return false; } - upb_value_setint32(&f->default_value, val); + upb_value_setint32(&f->defaultval, val); } } return true; } -static upb_flow_t upb_fielddef_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_fielddef *f = malloc(sizeof(*f)); - f->number = -1; - f->name = NULL; - f->def = NULL; - f->owned = false; - f->msgdef = upb_defbuilder_top(b); - b->f = f; - return UPB_CONTINUE; -} - -// Converts the default value in string "dstr" into "d". Passes a ref on dstr. -// Returns true on success. -static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) { - bool success = true; - if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { - // We'll keep the ref we had on it. We include enums in this case because - // we need the enumdef to resolve the name, but we may not have it yet. - // We'll resolve it later. - if (dstr) { - upb_value_setstr(d, dstr); - } else { - upb_value_setstr(d, upb_emptystring()); - } - } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { - // We don't expect to get a default value. - upb_string_unref(dstr); - if (dstr != NULL) success = false; - } else { - // The strto* functions need the string to be NULL-terminated. - char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); - char *end; - upb_string_unref(dstr); - switch (type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): - if (strz) { - long val = strtol(strz, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) - success = false; - else - upb_value_setint32(d, val); - } else { - upb_value_setint32(d, 0); - } - break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): - if (strz) { - upb_value_setint64(d, strtoll(strz, &end, 0)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setint64(d, 0); - } - break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - if (strz) { - unsigned long val = strtoul(strz, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) - success = false; - else - upb_value_setuint32(d, val); - } else { - upb_value_setuint32(d, 0); - } - break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - if (strz) { - upb_value_setuint64(d, strtoull(strz, &end, 0)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setuint64(d, 0); - } - break; - case UPB_TYPE(DOUBLE): - if (strz) { - upb_value_setdouble(d, strtod(strz, &end)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setdouble(d, 0.0); - } - break; - case UPB_TYPE(FLOAT): - if (strz) { - upb_value_setfloat(d, strtof(strz, &end)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setfloat(d, 0.0); - } - break; - case UPB_TYPE(BOOL): - if (!strz || strcmp(strz, "false") == 0) - upb_value_setbool(d, false); - else if (strcmp(strz, "true") == 0) - upb_value_setbool(d, true); - else - success = false; - break; - } - free(strz); - } - return success; +void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { + assert(f->msgdef == NULL); + f->number = number; } -static void upb_fielddef_endmsg(void *_b, upb_status *status) { - upb_defbuilder *b = _b; - upb_fielddef *f = b->f; - // TODO: verify that all required fields were present. - assert(f->number != -1 && f->name != NULL); - assert((f->def != NULL) == upb_hasdef(f)); +void upb_fielddef_setname(upb_fielddef *f, upb_string *name) { + assert(f->msgdef == NULL); + f->name = upb_string_getref(name); +} - // Field was successfully read, add it as a field of the msgdef. - upb_msgdef *m = upb_defbuilder_top(b); - upb_itof_ent itof_ent = {0, f->type, upb_types[f->type].native_wire_type, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, f->number, &itof_ent); - upb_strtable_insert(&m->ntof, &ntof_ent.e); +void upb_fielddef_settype(upb_fielddef *f, uint8_t type) { + assert(!f->finalized); + f->type = type; +} - upb_string *dstr = b->default_string; - b->default_string = NULL; - if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) { - // We don't worry too much about giving a great error message since the - // compiler should have ensured this was correct. - upb_seterr(status, UPB_ERROR, "Error converting default value."); - return; - } +void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) { + assert(!f->finalized); + f->label = label; +} +void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { + assert(!f->finalized); + // TODO: string ownership? + f->defaultval = value; } -static upb_flow_t upb_fielddef_ontype(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->f->type = upb_value_getint32(val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_onlabel(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->f->label = upb_value_getint32(val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_onnumber(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - b->f->number = upb_value_getint32(val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_onname(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_string_unref(b->f->name); - b->f->name = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_ontypename(void *_b, upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); - b->f->owned = true; - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_ondefaultval(void *_b, upb_value fval, - upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - // Have to convert from string to the correct type, but we might not know the - // type yet. - upb_string_unref(b->default_string); - b->default_string = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg); - upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg); - -#define FIELD(name, handler) \ - upb_fhandlers_setvalue( \ - upb_mhandlers_newfhandlers(m, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \ - false), \ - handler); - FIELD(TYPE, &upb_fielddef_ontype); - FIELD(LABEL, &upb_fielddef_onlabel); - FIELD(NUMBER, &upb_fielddef_onnumber); - FIELD(NAME, &upb_fielddef_onname); - FIELD(TYPE_NAME, &upb_fielddef_ontypename); - FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval); - return m; +void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { + assert(!f->finalized); + // TODO: string ownership? + f->fval = fval; } -#undef FNUM -#undef FTYPE +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) { + assert(!f->finalized); + f->accessor = vtbl; +} -/* upb_msgdef *****************************************************************/ +void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) { + upb_def_unref(f->def); + f->def = UPB_UPCAST(upb_unresolveddef_new(name)); +} -static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { - // Sort by data size (ascending) to reduce padding. +// Returns an ordering of fields based on: +// 1. value size (small to large). +// 2. field number. +static int upb_fielddef_cmpval(const void *_f1, const void *_f2) { + upb_fielddef *f1 = *(void**)_f1; + upb_fielddef *f2 = *(void**)_f2; size_t size1 = upb_types[f1->type].size; size_t size2 = upb_types[f2->type].size; if (size1 != size2) return size1 - size2; - // Otherwise return in number order (just so we get a reproduceable order. + // Otherwise return in number order. return f1->number - f2->number; } -static int upb_compare_fields(const void *f1, const void *f2) { - return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +// Returns an ordering of all fields based on: +// 1. required/optional (required fields first). +// 2. field number +static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) { + upb_fielddef *f1 = *(void**)_f1; + upb_fielddef *f2 = *(void**)_f2; + size_t req1 = f1->label == UPB_LABEL(REQUIRED); + size_t req2 = f2->label == UPB_LABEL(REQUIRED); + if (req1 != req2) return req1 - req2; + // Otherwise return in number order. + return f1->number - f2->number; } -// google.protobuf.DescriptorProto. -static upb_flow_t upb_msgdef_startmsg(void *_b) { - upb_defbuilder *b = _b; + +/* upb_msgdef *****************************************************************/ + +upb_msgdef *upb_msgdef_new() { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); - m->default_message = NULL; - upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b); - return UPB_CONTINUE; -} - -static void upb_msgdef_endmsg(void *_b, upb_status *status) { - upb_defbuilder *b = _b; - upb_msgdef *m = upb_defbuilder_top(b); - if(!m->base.fqname) { - upb_seterr(status, UPB_ERROR, "Encountered message with no name."); - return; + m->size = 0; + m->hasbit_bytes = 0; + m->extension_start = 0; + m->extension_end = 0; + return m; +} + +static void upb_msgdef_free(upb_msgdef *m) { + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_free(upb_msg_iter_field(i)); + upb_strtable_free(&m->ntof); + upb_inttable_free(&m->itof); + upb_def_uninit(&m->base); + free(m); +} + +upb_msgdef *upb_msgdef_dup(upb_msgdef *m) { + upb_msgdef *newm = upb_msgdef_new(); + newm->size = m->size; + newm->hasbit_bytes = m->hasbit_bytes; + newm->extension_start = m->extension_start; + newm->extension_end = m->extension_end; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i))); + return newm; +} + +void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->size = size; +} + +void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->hasbit_bytes = bytes; +} + +void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->extension_start = start; +} + +void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end) { + assert(upb_def_ismutable(UPB_UPCAST(m))); + m->extension_end = end; +} + +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) { + assert(upb_atomic_read(&f->refcount) > 0); + if (!upb_atomic_unref(&f->refcount)) upb_msgdef_ref(m); + if (upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) { + upb_fielddef_unref(f); + return false; } + assert(f->msgdef == NULL); + f->msgdef = m; + upb_itof_ent itof_ent = {0, f}; + upb_ntof_ent ntof_ent = {{f->name, 0}, f}; + upb_inttable_insert(&m->itof, f->number, &itof_ent); + upb_strtable_insert(&m->ntof, &ntof_ent.e); + return true; +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} - upb_inttable_compact(&m->itof); - // Create an ordering over the fields. - int n = upb_msgdef_numfields(m); - upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); - int field = 0; +void upb_msgdef_layout(upb_msgdef *m) { + // Create an ordering over the fields, but only include fields with accessors. + upb_fielddef **sorted_fields = + malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m)); + int n = 0; upb_msg_iter i; for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - sorted_fields[field++] = upb_msg_iter_field(i); + upb_fielddef *f = upb_msg_iter_field(i); + if (f->accessor) sorted_fields[n++] = f; } - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); - // Assign offsets in the msg. - m->set_flags_bytes = upb_div_round_up(n, 8); - m->size = sizeof(upb_atomic_t) + m->set_flags_bytes; + m->hasbit_bytes = upb_div_round_up(n, 8); + m->size = m->hasbit_bytes; // + header_size? + // Assign hasbits. + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit); + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + f->hasbit = i; + } + + // Assign value offsets. + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval); size_t max_align = 0; for (int i = 0; i < n; i++) { upb_fielddef *f = sorted_fields[i]; const upb_type_info *type_info = &upb_types[f->type]; - - // This identifies the set bit. When we implement is_initialized (a - // general check about whether all required bits are set) we will probably - // want to use a different ordering that puts all the required bits - // together. - f->field_index = i; - f->set_bit_mask = 1 << (i % 8); - f->set_bit_offset = i / 8; - - size_t size, align; - if (upb_isarray(f)) { + size_t size = type_info->size; + size_t align = type_info->align; + if (upb_isseq(f)) { size = sizeof(void*); align = alignof(void*); - } else { - size = type_info->size; - align = type_info->align; } + // General alignment rules are: each member must be at an address that is a // multiple of that type's alignment. Also, the size of the structure as a // whole must be a multiple of the greatest alignment of any member. - size_t offset = upb_align_up(m->size, align); - // Offsets are relative to the end of the refcount. - f->byte_offset = offset - sizeof(upb_atomic_t); - m->size = offset + size; + f->offset = upb_align_up(m->size, align); + m->size = f->offset + size; max_align = UPB_MAX(max_align, align); } - free(sorted_fields); - if (max_align > 0) m->size = upb_align_up(m->size, max_align); - // Create default message instance, an immutable message with all default - // values set (except submessages, which are simply marked as unset). We - // could alternatively leave all set bits unset, but this would make - // upb_msg_get() take its unexpected branch more often for no good reason. - m->default_message = upb_msg_new(m); - for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) { - upb_msg_set(m->default_message, f, f->default_value); - } - } - - upb_defbuilder_endcontainer(b); -} - -static upb_flow_t upb_msgdef_onname(void *_b, upb_value fval, upb_value val) { - (void)fval; - upb_defbuilder *b = _b; - assert(val.type == UPB_TYPE(STRING)); - upb_msgdef *m = upb_defbuilder_top(b); - upb_string_unref(m->base.fqname); - m->base.fqname = upb_string_getref(upb_value_getstr(val)); - upb_defbuilder_setscopename(b, upb_value_getstr(val)); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg); - upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_msgdef_onname); - - upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true, - upb_fielddef_register_FieldDescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); - - // DescriptorProto is self-recursive, so we must link the definition. - upb_mhandlers_newfhandlers_subm( - m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m); - - // TODO: extensions. - return m; -} -#undef FNUM -#undef FTYPE - -static void upb_msgdef_free(upb_msgdef *m) -{ - upb_msg_unref(m->default_message, m); - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_free(upb_msg_iter_field(i)); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); - upb_def_uninit(&m->base); - free(m); + free(sorted_fields); } upb_msg_iter upb_msg_begin(upb_msgdef *m) { @@ -1006,22 +489,49 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, iter); } -/* upb_symtab adding defs *****************************************************/ -// This is a self-contained group of functions that, given a list of upb_defs -// whose references are not yet resolved, resolves references and adds them -// atomically to a upb_symtab. +/* upb_symtabtxn **************************************************************/ typedef struct { upb_strtable_entry e; upb_def *def; } upb_symtab_ent; +void upb_symtabtxn_init(upb_symtabtxn *t) { + upb_strtable_init(&t->deftab, 16, sizeof(upb_symtab_ent)); +} + +void upb_symtabtxn_uninit(upb_symtabtxn *txn) { + upb_strtable *t = &txn->deftab; + upb_symtab_ent *e; + for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(t); +} + +bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) { + // TODO: check if already present. + upb_symtab_ent e = {{def->fqname, 0}, def}; + upb_strtable_insert(&t->deftab, &e.e); + return true; +} + +#if 0 +err: + // We need to free all defs from "tmptab." + upb_rwlock_unlock(&s->lock); + for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; + e = upb_strtable_next(&tmptab, &e->e)) { + upb_def_unref(e->def); + } + upb_strtable_free(&tmptab); + return false; +#endif + // Given a symbol and the base symbol inside which it is defined, find the // symbol's definition in t. static upb_symtab_ent *upb_resolve(upb_strtable *t, - upb_string *base, upb_string *sym) -{ + upb_string *base, upb_string *sym) { if(upb_string_len(sym) == 0) return NULL; if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { // Symbols starting with '.' are absolute, so we do a single lookup. @@ -1060,212 +570,63 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, } } -// Performs a pass over the type graph to find all cycles that include m. -static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) -{ - if(depth > UPB_MAX_TYPE_DEPTH) { - // We have found a non-cyclic path from the base of the type tree that - // exceeds the maximum allowed depth. There are many situations in upb - // where we recurse over the type tree (like for example, right now) and an - // absurdly deep tree could cause us to stack overflow on systems with very - // limited stacks. - upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " - "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, depth, - UPB_MAX_TYPE_DEPTH); - return false; - } else if(UPB_UPCAST(m)->search_depth == 1) { - // Cycle! - int cycle_len = depth - 1; - if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " - "in a cycle of length %d, which exceeds the maximum type " - "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, - UPB_MAX_TYPE_CYCLE_LEN); - return false; - } - return true; - } else if(UPB_UPCAST(m)->search_depth > 0) { - // This was a cycle, but did not originate from the base of our search tree. - // We'll find it when we call find_cycles() on this node directly. - return false; - } else { - UPB_UPCAST(m)->search_depth = ++depth; - bool cycle_found = false; - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if(!upb_issubmsg(f)) continue; - upb_def *sub_def = f->def; - upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(upb_symtab_findcycles(sub_m, depth, status)) { - cycle_found = true; - UPB_UPCAST(m)->is_cyclic = true; - if(f->owned) { - upb_atomic_unref(&sub_def->refcount); - f->owned = false; - } - } - } - UPB_UPCAST(m)->search_depth = 0; - return cycle_found; - } +upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) { + return upb_strtable_begin(&t->deftab); } -// Given a table of pending defs "tmptab" and a table of existing defs "symtab", -// resolves all of the unresolved refs for the defs in tmptab. Also resolves -// default values for enumerations and submessages. -bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, - upb_status *status) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // Type names are resolved relative to the message in which they appear. - upb_string *base = e->e.key; - - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_string *name = upb_downcast_unresolveddef(f->def)->name; - - // Resolve from either the tmptab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(tmptab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return false; - } - - // Check the type of the found def. - upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_seterr(status, UPB_ERROR, "Unexpected type"); - return false; - } - if (!upb_fielddef_resolve(f, found->def, status)) return false; - } - } +upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) { + return upb_strtable_next(&t->deftab, i); +} - // Deal with type cycles. - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // The findcycles() call will decrement the external refcount of the - upb_symtab_findcycles(m, 0, status); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } +bool upb_symtabtxn_done(upb_symtabtxn_iter i) { + return i == NULL; +} - return true; +upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) { + upb_symtab_ent *e = iter; + return e->def; } -// Given a list of defs, a list of extensions (in the future), and a flag -// indicating whether the new defs can overwrite existing defs in the symtab, -// attempts to add the given defs to the symtab. The whole operation either -// succeeds or fails. Ownership of "defs" and "exts" is taken. -static bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, - bool allow_redef, upb_status *status) -{ - upb_rwlock_wrlock(&s->lock); - // Build a table of the defs we mean to add, for duplicate detection and name - // resolution. - upb_strtable tmptab; - upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); - for (int i = 0; i < num_defs; i++) { - upb_def *def = defs[i]; - upb_symtab_ent e = {{def->fqname, 0}, def}; - - // Redefinition is never allowed within a single FileDescriptorSet. - // Additionally, we only allow overwriting of an existing definition if - // allow_redef is set. - if (upb_strtable_lookup(&tmptab, def->fqname) || - (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, - UPB_STRARG(def->fqname)); - goto err; - } +/* upb_symtab public interface ************************************************/ - // Pass ownership from the deflist to the strtable. - upb_strtable_insert(&tmptab, &e.e); - defs[i] = NULL; +static void _upb_symtab_free(upb_strtable *t) { + upb_symtab_ent *e; + for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) { + assert(upb_atomic_read(&e->def->refcount) == 0); + upb_def_free(e->def); } + upb_strtable_free(t); +} - // TODO: process the list of extensions by modifying entries from - // tmptab in-place (copying them from the symtab first if necessary). - - if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; - - // The defs in tmptab have been vetted, and can be added to the symtab - // without causing errors. Now add all tmptab defs to the symtab, - // overwriting (and releasing a ref on) any existing defs with the same - // names. Ownership for tmptab defs passes from the tmptab to the symtab. - upb_symtab_ent *tmptab_e; - for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; - tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_def_unref(symtab_e->def); - symtab_e->def = tmptab_e->def; - } else { - upb_strtable_insert(&s->symtab, &tmptab_e->e); - } +static void upb_symtab_free(upb_symtab *s) { + _upb_symtab_free(&s->symtab); + for (uint32_t i = 0; i < s->olddefs.len; i++) { + upb_def *d = s->olddefs.defs[i]; + assert(upb_atomic_read(&d->refcount) == 0); + upb_def_free(d); } + upb_rwlock_destroy(&s->lock); + upb_deflist_uninit(&s->olddefs); + free(s); +} - upb_rwlock_unlock(&s->lock); - upb_strtable_free(&tmptab); - return true; - -err: - // We need to free all defs from "tmptab." - upb_rwlock_unlock(&s->lock); - for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) { - upb_def_unref(e->def); +void upb_symtab_unref(upb_symtab *s) { + if(s && upb_atomic_unref(&s->refcount)) { + upb_symtab_free(s); } - upb_strtable_free(&tmptab); - return false; } - -/* upb_symtab public interface ************************************************/ - -upb_symtab *upb_symtab_new() -{ +upb_symtab *upb_symtab_new() { upb_symtab *s = malloc(sizeof(*s)); upb_atomic_init(&s->refcount, 1); upb_rwlock_init(&s->lock); upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - s->fds_msgdef = NULL; + upb_deflist_init(&s->olddefs); return s; } -static void upb_free_symtab(upb_strtable *t) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - upb_free_symtab(&s->symtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) -{ +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { upb_rwlock_rdlock(&s->lock); int total = upb_strtable_count(&s->symtab); // We may only use part of this, depending on how many symbols are of the @@ -1281,13 +642,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) } upb_rwlock_unlock(&s->lock); *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); + for(i = 0; i < *count; i++) upb_def_ref(defs[i]); return defs; } -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) -{ +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) { upb_rwlock_rdlock(&s->lock); upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); upb_def *ret = NULL; @@ -1299,7 +658,6 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) return ret; } - upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { upb_rwlock_rdlock(&s->lock); upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); @@ -1311,3 +669,109 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) upb_rwlock_unlock(&s->lock); return ret; } + +bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n, + upb_symtabtxn *txn) { + // This linear search makes the DFS O(n^2) in the length of the paths. + // Could make this O(n) with a hash table, but n is small. + for (int i = 0; i < n; i++) { + if (def == open_defs[i]) return false; + } + + bool needcopy = false; + upb_msgdef *m = upb_dyncast_msgdef(def); + if (m) { + upb_msg_iter i; + open_defs[n++] = def; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if (!upb_hasdef(f)) continue; + needcopy |= upb_symtab_dfs(f->def, open_defs, n, txn); + } + } + + bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL); + if (needcopy && !replacing) { + upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)}; + upb_strtable_insert(&txn->deftab, &e.e); + replacing = true; + } + return replacing; +} + +bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) { + upb_rwlock_wrlock(&s->lock); + + // All existing defs that can reach defs that are being replaced must + // themselves be replaced with versions that will point to the new defs. + // Do a DFS -- any path that finds a new def must replace all ancestors. + upb_strtable *symtab = &s->symtab; + upb_symtab_ent *e; + for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) { + upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; + upb_symtab_dfs(e->def, open_defs, 0, txn); + } + + // Resolve all refs. + upb_strtable *txntab = &txn->deftab; + for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // Type names are resolved relative to the message in which they appear. + upb_string *base = m->base.fqname; + + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if(!upb_hasdef(f)) continue; // No resolving necessary. + upb_string *name = upb_downcast_unresolveddef(f->def)->name; + + // Resolve from either the txntab (pending adds) or symtab (existing + // defs). If both exist, prefer the pending add, because it will be + // overwriting the existing def. + upb_symtab_ent *found; + if(!(found = upb_resolve(txntab, base, name)) && + !(found = upb_resolve(symtab, base, name))) { + upb_seterr(status, UPB_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return false; + } + + // Check the type of the found def. + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(found->def->type != expected) { + upb_seterr(status, UPB_ERROR, "Unexpected type"); + return false; + } + if (!upb_fielddef_resolve(f, found->def, status)) return false; + } + } + + // The defs in the transaction have been vetted, and can be moved to the + // symtab without causing errors. + upb_symtab_ent *tmptab_e; + for(tmptab_e = upb_strtable_begin(txntab); tmptab_e; + tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) { + upb_def_movetosymtab(tmptab_e->def, s); + upb_symtab_ent *symtab_e = + upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); + if(symtab_e) { + upb_deflist_push(&s->olddefs, symtab_e->def); + symtab_e->def = tmptab_e->def; + } else { + upb_strtable_insert(&s->symtab, &tmptab_e->e); + } + } + + upb_strtable_clear(txntab); + upb_rwlock_unlock(&s->lock); + upb_symtab_gc(s); + return true; +} + +void upb_symtab_gc(upb_symtab *s) { + (void)s; + // TODO. +} diff --git a/src/upb_def.h b/src/upb_def.h index 776231a..ca969cb 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -4,25 +4,16 @@ * Copyright (c) 2009-2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * Provides a mechanism for loading proto definitions from descriptors, and - * data structures to represent those definitions. These form the protobuf - * schema, and are used extensively throughout upb: + * Provides a mechanism for creating and linking proto definitions. + * These form the protobuf schema, and are used extensively throughout upb: * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. - * (TODO: definitions of extensions and services). + * (TODO: definitions of services). * - * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying descriptors. - * - * Defs are immutable and reference-counted. Symbol tables reference any defs - * that are the "current" definitions. If an extension is loaded that adds a - * field to an existing message, a new msgdef is constructed that includes the - * new field and the old msgdef is unref'd. The old msgdef will still be ref'd - * by messages (if any) that were constructed with that msgdef. - * - * This file contains routines for creating and manipulating the definitions - * themselves. To create and manipulate actual messages, see upb_msg.h. + * These defs are mutable (and not thread-safe) when first created. + * Once they are added to a defbuilder (and later its symtab) they become + * immutable. */ #ifndef UPB_DEF_H_ @@ -35,51 +26,37 @@ extern "C" { #endif -/* upb_def: base class for defs **********************************************/ +struct _upb_symtab; +typedef struct _upb_symtab upb_symtab; // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. typedef enum { UPB_DEF_MSG = 0, UPB_DEF_ENUM, - UPB_DEF_SVC, - UPB_DEF_EXT, - // Internal-only, placeholder for a def that hasn't be resolved yet. - UPB_DEF_UNRESOLVED, + UPB_DEF_SERVICE, // Not yet implemented. - // For specifying that defs of any type are requsted from getdefs. - UPB_DEF_ANY = -1 -} upb_deftype; + UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() + UPB_DEF_UNRESOLVED = 99, // Internal-only. +} upb_deftype_t; -// This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_deftype_t; + +/* upb_def: base class for defs **********************************************/ typedef struct { - upb_string *fqname; // Fully qualified. - upb_atomic_t refcount; + upb_string *fqname; // Fully qualified. + upb_symtab *symtab; // Def is mutable iff symtab == NULL. + upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0). upb_deftype_t type; - - // The is_cyclic flag could go in upb_msgdef instead of here, because only - // messages can be involved in cycles. However, putting them here is free - // from a space perspective because structure alignment will otherwise leave - // three bytes empty after type. It is also makes ref and unref more - // efficient, because we don't have to downcast to msgdef before checking the - // is_cyclic flag. - bool is_cyclic; - uint16_t search_depth; // Used during initialization dfs. } upb_def; -// These must not be called directly! -void _upb_def_cyclic_ref(upb_def *def); -void _upb_def_reftozero(upb_def *def); - -// Call to ref/deref a def. -INLINE void upb_def_ref(upb_def *def) { - if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); -} -INLINE void upb_def_unref(upb_def *def) { - if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); -} +// Call to ref/unref a def. Can be used at any time, but is not thread-safe +// until the def is in a symtab. While a def is in a symtab, everything +// reachable from that def (the symtab and all defs in the symtab) are +// guaranteed to be alive. +void upb_def_ref(upb_def *def); +void upb_def_unref(upb_def *def); +upb_def *upb_def_dup(upb_def *def); #define UPB_UPCAST(ptr) (&(ptr)->base) @@ -88,30 +65,66 @@ INLINE void upb_def_unref(upb_def *def) { // A upb_fielddef describes a single field in a message. It isn't a full def // in the sense that it derives from upb_def. It cannot stand on its own; it -// is either a field of a upb_msgdef or contained inside a upb_extensiondef. -// It is also reference-counted. +// must be part of a upb_msgdef. It is also reference-counted. struct _upb_fielddef { - uint8_t type; - uint8_t label; - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; - uint8_t set_bit_mask; + struct _upb_msgdef *msgdef; + upb_def *def; // if upb_hasdef(f) + upb_atomic_t refcount; + bool finalized; + // The following fields may be modified until the def is finalized. + uint8_t type; // Use UPB_TYPE() constants. + uint8_t label; // Use UPB_LABEL() constants. + int16_t hasbit; + uint16_t offset; int32_t number; - int16_t field_index; // Indicates set bit. + upb_string *name; + upb_value defaultval; // Only meaningful for non-repeated scalars and strings. + upb_value fval; + struct _upb_accessor_vtbl *accessor; +}; - uint16_t set_bit_offset; - uint32_t byte_offset; // Where in a upb_msg to find the data. +upb_fielddef *upb_fielddef_new(); +void upb_fielddef_ref(upb_fielddef *f); +void upb_fielddef_unref(upb_fielddef *f); +upb_fielddef *upb_fielddef_dup(upb_fielddef *f); + +// Read accessors. May be called any time. +INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; } +INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; } +INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; } +INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; } +INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; } +INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; } +INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; } +INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) { + return f->msgdef; +} +INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) { + return f->accessor; +} - upb_value default_value; - upb_string *name; - struct _upb_msgdef *msgdef; +// Only meaningful once the def is in a symtab (returns NULL otherwise, or for +// a fielddef where !upb_hassubdef(f)). +upb_def *upb_fielddef_subdef(upb_fielddef *f); - // For the case of an enum or a submessage, points to the def for that type. - upb_def *def; - upb_atomic_t refcount; -}; +// NULL until the fielddef has been added to a msgdef. + +// Write accessors. "Number" and "name" must be set before the fielddef is +// added to a msgdef. For the moment we do not allow these to be set once +// the fielddef is added to a msgdef -- this could be relaxed in the future. +void upb_fielddef_setnumber(upb_fielddef *f, int32_t number); +void upb_fielddef_setname(upb_fielddef *f, upb_string *name); + +// These writers may be called at any time prior to being put in a symtab. +void upb_fielddef_settype(upb_fielddef *f, uint8_t type); +void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label); +void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); +void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); +// The name of the message or enum this field is referring to. Must be found +// at name resolution time (when the symtabtxn is committed to the symtab). +void upb_fielddef_settypename(upb_fielddef *f, upb_string *name); // A variety of tests about the type of a field. INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { @@ -125,58 +138,35 @@ INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { } INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); } INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); } -INLINE bool upb_isarray(upb_fielddef *f) { - return f->label == UPB_LABEL(REPEATED); -} +INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); } + // Does the type of this field imply that it should contain an associated def? INLINE bool upb_hasdef(upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } -INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { - if (upb_isarray(f)) { - return UPB_VALUETYPE_ARRAY; - } else { - return f->type; - } -} - -INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { - assert(upb_isarray(f)); - return f->type; -} - -INLINE bool upb_field_ismm(upb_fielddef *f) { - return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); -} - -INLINE bool upb_elem_ismm(upb_fielddef *f) { - return upb_isstring(f) || upb_issubmsg(f); -} /* upb_msgdef *****************************************************************/ // Structure that describes a single .proto message type. typedef struct _upb_msgdef { upb_def base; - upb_atomic_t cycle_refcount; - uint32_t size; - uint32_t set_flags_bytes; // Tables for looking up fields by number and name. upb_inttable itof; // int to field upb_strtable ntof; // name to field - // Immutable msg instance that has all default values set. - // TODO: need a way of making this immutable! - struct _upb_msg *default_message; + // The following fields may be modified until finalized. + uint16_t size; + uint8_t hasbit_bytes; + // The range of tag numbers used to store extensions. + uint32_t extension_start; + uint32_t extension_end; } upb_msgdef; // Hash table entries for looking up fields by name or number. typedef struct { bool junk; - uint8_t field_type; - uint8_t native_wire_type; upb_fielddef *f; } upb_itof_ent; typedef struct { @@ -184,23 +174,56 @@ typedef struct { upb_fielddef *f; } upb_ntof_ent; -INLINE void upb_msgdef_unref(upb_msgdef *md) { - upb_def_unref(UPB_UPCAST(md)); +upb_msgdef *upb_msgdef_new(); +INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); } +INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); } + +// Returns a new msgdef that is a copy of the given msgdef (and a copy of all +// the fields) but with any references to submessages broken and replaced with +// just the name of the submessage. This can be put back into another symtab +// and the names will be re-resolved in the new context. +upb_msgdef *upb_msgdef_dup(upb_msgdef *m); + +// Read accessors. May be called at any time. +INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; } +INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) { + return m->hasbit_bytes; +} +INLINE uint32_t upb_msgdef_extension_start(upb_msgdef *m) { + return m->extension_start; } -INLINE void upb_msgdef_ref(upb_msgdef *md) { - upb_def_ref(UPB_UPCAST(md)); +INLINE uint32_t upb_msgdef_extension_end(upb_msgdef *m) { + return m->extension_end; } +// Write accessors. May only be called before the msgdef is in a symtab. +void upb_msgdef_setsize(upb_msgdef *m, uint16_t size); +void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes); +void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start); +void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end); + +// Adds a fielddef to a msgdef, and passes a ref on the field to the msgdef. +// May only be done before the msgdef is in a symtab. The fielddef's name and +// number must be set, and the message may not already contain any field with +// this name or number -- if it does, the fielddef is unref'd and false is +// returned. The fielddef may not already belong to another message. +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f); + +// Sets the layout of all fields according to default rules: +// 1. Hasbits for required fields come first, then optional fields. +// 2. Values are laid out in a way that respects alignment rules. +// 3. The order is chosen to minimize memory usage. +// This should only be called once all fielddefs have been added. +// TODO: will likely want the ability to exclude strings/submessages/arrays. +// TODO: will likely want the ability to define a header size. +void upb_msgdef_layout(upb_msgdef *m); + // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. -INLINE upb_itof_ent *upb_msgdef_itofent(upb_msgdef *m, uint32_t num) { - return (upb_itof_ent*)upb_inttable_fastlookup( - &m->itof, num, sizeof(upb_itof_ent)); -} - -INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) { - upb_itof_ent *e = upb_msgdef_itofent(m, num); +INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) { + upb_itof_ent *e = (upb_itof_ent*) + upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent)); return e ? e->f : NULL; } @@ -214,6 +237,7 @@ INLINE int upb_msgdef_numfields(upb_msgdef *m) { } // Iteration over fields. The order is undefined. +// Iterators are invalidated when a field is added or removed. // upb_msg_iter i; // for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { // upb_fielddef *f = upb_msg_iter_field(i); @@ -225,6 +249,7 @@ upb_msg_iter upb_msg_begin(upb_msgdef *m); upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); } +// Iterator accessor. INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter); return ent->f; @@ -233,13 +258,11 @@ INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { /* upb_enumdef ****************************************************************/ -typedef int32_t upb_enumval_t; - typedef struct _upb_enumdef { upb_def base; upb_strtable ntoi; upb_inttable iton; - upb_enumval_t default_value; // The first value listed in the enum. + int32_t defaultval; } upb_enumdef; typedef struct { @@ -252,12 +275,28 @@ typedef struct { upb_string *string; } upb_iton_ent; +upb_enumdef *upb_enumdef_new(); +INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); } +INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); } +upb_enumdef *upb_enumdef_dup(upb_enumdef *e); + +INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; } + +// May only be set before the enumdef is in a symtab. +void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); + +// Adds a value to the enumdef. Requires that no existing val has this +// name or number (returns false and does not add if there is). May only +// be called before the enumdef is in a symtab. +bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num); + // Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num); // Caller does not own a ref on the returned string. -upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); +upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num); // Iteration over name/value pairs. The order is undefined. +// Adding an enum val invalidates any iterators. // upb_enum_iter i; // for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // // ... @@ -268,6 +307,7 @@ upb_enum_iter upb_enum_begin(upb_enumdef *e); upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); } +// Iterator accessors. INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter); return e->string; @@ -277,28 +317,74 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { } +/* upb_symtabtxn **************************************************************/ + +// A symbol table transaction is a map of defs that can be added to a symtab +// in one single atomic operation that either succeeds or fails. Mutable defs +// can be added to this map (and perhaps removed, in the future). +// +// A symtabtxn is not thread-safe. + +typedef struct { + upb_strtable deftab; +} upb_symtabtxn; + +void upb_symtabtxn_init(upb_symtabtxn *t); +void upb_symtabtxn_uninit(upb_symtabtxn *t); + +// Adds a def to the symtab. Caller passes a ref on the def to the symtabtxn. +// The def's name must be set and there must not be any existing defs in the +// symtabtxn with this name, otherwise false will be returned and no operation +// will be performed (and the ref on the def will be released). +bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def); + +// Gets the def (if any) that is associated with this name in the symtab. +// Caller does *not* inherit a ref on the def. +upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name); + +// Iterate over the defs that are part of the transaction. +// The order is undefined. +// The iterator is invalidated by upb_symtabtxn_add(). +// upb_symtabtxn_iter i; +// for(i = upb_symtabtxn_begin(t); !upb_symtabtxn_done(t); +// i = upb_symtabtxn_next(t, i)) { +// upb_def *def = upb_symtabtxn_iter_def(i); +// } +typedef void* upb_symtabtxn_iter; + +upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t); +upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i); +bool upb_symtabtxn_done(upb_symtabtxn_iter i); +upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter); + + /* upb_symtab *****************************************************************/ // A SymbolTable is where upb_defs live. It is empty when first constructed. -// Clients add definitions to the symtab by supplying descriptors (as defined -// in descriptor.proto) via the upb_stream interface. +// Clients add definitions to the symtab (or replace existing definitions) by +// using a upb_symtab_commit() or calling upb_symtab_add(). + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +void upb_deflist_init(upb_deflist *l); +void upb_deflist_uninit(upb_deflist *l); +void upb_deflist_push(upb_deflist *l, upb_def *d); + struct _upb_symtab { upb_atomic_t refcount; upb_rwlock_t lock; // Protects all members except the refcount. upb_strtable symtab; // The symbol table. - upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet. + upb_deflist olddefs; }; -typedef struct _upb_symtab upb_symtab; -// Initializes a upb_symtab. Symtabs are not freed explicitly, but unref'd -// when the caller is done with them. upb_symtab *upb_symtab_new(void); -void _upb_symtab_free(upb_symtab *s); // Must not be called directly! - INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } -INLINE void upb_symtab_unref(upb_symtab *s) { - if(s && upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); -} +void upb_symtab_unref(upb_symtab *s); // Resolves the given symbol using the rules described in descriptor.proto, // namely: @@ -310,35 +396,36 @@ INLINE void upb_symtab_unref(upb_symtab *s) { // // If a def is found, the caller owns one ref on the returned def. Otherwise // returns NULL. +// TODO: make return const upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); // Find an entry in the symbol table with this exact name. If a def is found, // the caller owns one ref on the returned def. Otherwise returns NULL. +// TODO: make return const upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref // to each symbol inside. If type is UPB_DEF_ANY then defs of all types are // returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); +// TODO: make return const +upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type); -// upb_defbuilder: For adding defs to the symtab. -// You allocate the defbuilder, which can handle a single descriptor. -// It will be freed automatically when the parse completes. -struct _upb_defbuilder; -typedef struct _upb_defbuilder upb_defbuilder; -struct _upb_handlers; -struct _upb_handlers; +// Adds a single upb_def into the symtab. A ref on the def is passed to the +// symtab. If any references cannot be resolved, false is returned and the +// symtab is unchanged. The error (if any) is saved to status if non-NULL. +bool upb_symtab_add(upb_symtab *s, upb_def *d, upb_status *status); -// Allocates a new defbuilder that will add defs to the given symtab. -upb_defbuilder *upb_defbuilder_new(upb_symtab *s); +// Adds the set of defs contained in the transaction to the symtab, clearing +// the txn. The entire operation either succeeds or fails. If the operation +// fails, the symtab is unchanged, false is returned, and status indicates +// the error. +bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *t, upb_status *status); -// Registers handlers that will operate on a defbuilder to add the defs -// to the defbuilder's symtab. Will free itself when the parse finishes. -// -// TODO: should this allow redefinition? Either is possible, but which is -// more useful? Maybe it should be an option. -struct _upb_mhandlers *upb_defbuilder_reghandlers(struct _upb_handlers *h); +// Frees defs that are no longer active in the symtab and are no longer +// reachable. Such defs are not freed when they are replaced in the symtab +// if they are still reachable from defs that are still referenced. +void upb_symtab_gc(upb_symtab *s); /* upb_def casts **************************************************************/ @@ -352,8 +439,7 @@ struct _upb_mhandlers *upb_defbuilder_reghandlers(struct _upb_handlers *h); } UPB_DYNAMIC_CAST_DEF(msgdef, MSG); UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); -UPB_DYNAMIC_CAST_DEF(svcdef, SVC); -UPB_DYNAMIC_CAST_DEF(extdef, EXT); +UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE); UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); #undef UPB_DYNAMIC_CAST_DEF @@ -367,8 +453,7 @@ UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); } UPB_DOWNCAST_DEF(msgdef, MSG); UPB_DOWNCAST_DEF(enumdef, ENUM); -UPB_DOWNCAST_DEF(svcdef, SVC); -UPB_DOWNCAST_DEF(extdef, EXT); +UPB_DOWNCAST_DEF(svcdef, SERVICE); UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); #undef UPB_DOWNCAST_DEF diff --git a/src/upb_descriptor.c b/src/upb_descriptor.c new file mode 100644 index 0000000..127d19c --- /dev/null +++ b/src/upb_descriptor.c @@ -0,0 +1,548 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include "upb_descriptor.h" + +#include +#include +#include "upb_string.h" +#include "upb_def.h" + +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns a ref on the returned string. */ +static upb_string *upb_join(upb_string *base, upb_string *name) { + if (!base || upb_string_len(base) == 0) { + return upb_string_getref(name); + } else { + return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, + UPB_STRARG(base), UPB_STRARG(name)); + } +} + +/* upb_descreader ************************************************************/ + +// A upb_descreader builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_descreader is +// a upb_symtabtxn. + +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } +} + +// Forward declares for top-level file descriptors. +static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h); +static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h); + +void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) { + upb_deflist_init(&r->defs); + upb_status_init(&r->status); + r->txn = txn; + r->stack_len = 0; + r->name = NULL; + r->default_string = NULL; +} + +void upb_descreader_uninit(upb_descreader *r) { + upb_string_unref(r->name); + upb_status_uninit(&r->status); + upb_deflist_uninit(&r->defs); + upb_string_unref(r->default_string); + while (r->stack_len > 0) { + upb_descreader_frame *f = &r->stack[--r->stack_len]; + upb_string_unref(f->name); + } +} + +static upb_msgdef *upb_descreader_top(upb_descreader *r) { + if (r->stack_len <= 1) return NULL; + int index = r->stack[r->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(r->defs.defs[index]); +} + +static upb_def *upb_descreader_last(upb_descreader *r) { + return upb_deflist_last(&r->defs); +} + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +void upb_descreader_startcontainer(upb_descreader *r) { + upb_descreader_frame *f = &r->stack[r->stack_len++]; + f->start = r->defs.len; + f->name = NULL; +} + +void upb_descreader_endcontainer(upb_descreader *r) { + upb_descreader_frame *f = &r->stack[--r->stack_len]; + upb_deflist_qualify(&r->defs, f->name, f->start); + upb_string_unref(f->name); +} + +void upb_descreader_setscopename(upb_descreader *r, upb_string *str) { + upb_descreader_frame *f = &r->stack[r->stack_len-1]; + upb_string_unref(f->name); + f->name = upb_string_getref(str); +} + +// Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) { + upb_descreader *r = _r; + upb_descreader_startcontainer(r); + return UPB_CONTINUE; +} + +static void upb_descreader_FileDescriptorProto_endmsg(void *_r, + upb_status *status) { + (void)status; + upb_descreader *r = _r; + upb_descreader_endcontainer(r); +} + +static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r, + upb_value fval, + upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_descreader_setscopename(r, upb_value_getstr(val)); + return UPB_CONTINUE; +} + +static upb_mhandlers *upb_descreader_register_FileDescriptorProto( + upb_handlers *h) { + upb_mhandlers *m = upb_handlers_newmhandlers(h); + upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg); + upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg); + +#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM +#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE + upb_fhandlers *f = + upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); + upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package); + + upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, + upb_msgdef_register_DescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, + upb_enumdef_register_EnumDescriptorProto(h)); + // TODO: services, extensions + return m; +} +#undef FNUM +#undef FTYPE + +// Handlers for google.protobuf.FileDescriptorSet. +static void upb_descreader_FileDescriptorSet_onendmsg(void *_r, + upb_status *status) { + // Move all defs (which are now guaranteed to be fully-qualified) to the txn. + upb_descreader *r = _r; + if (upb_ok(status)) { + for (unsigned int i = 0; i < r->defs.len; i++) { + // TODO: check return for duplicate def. + upb_symtabtxn_add(r->txn, r->defs.defs[i]); + } + r->defs.len = 0; + } +} + +static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) { + upb_mhandlers *m = upb_handlers_newmhandlers(h); + upb_mhandlers_setendmsg(m, upb_descreader_FileDescriptorSet_onendmsg); + +#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM +#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE + upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true, + upb_descreader_register_FileDescriptorProto(h)); + return m; +} +#undef FNUM +#undef FTYPE + +upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) { + h->should_jit = false; + return upb_descreader_register_FileDescriptorSet(h); +} + +// google.protobuf.EnumValueDescriptorProto. +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) { + upb_descreader *r = _r; + r->saw_number = false; + r->saw_name = false; + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r, + upb_value fval, + upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_string_unref(r->name); + r->name = upb_string_getref(upb_value_getstr(val)); + r->saw_name = true; + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r, + upb_value fval, + upb_value val) { + (void)fval; + upb_descreader *r = _r; + r->number = upb_value_getint32(val); + r->saw_number = true; + return UPB_CONTINUE; +} + +static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, + upb_status *status) { + upb_descreader *r = _r; + if(!r->saw_number || !r->saw_name) { + upb_seterr(status, UPB_ERROR, "Enum value missing name or number."); + return; + } + upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + if (upb_inttable_count(&e->iton) == 0) { + // The default value of an enum (in the absence of an explicit default) is + // its first listed value. + upb_enumdef_setdefault(e, r->number); + } + upb_enumdef_addval(e, r->name, r->number); + upb_string_unref(r->name); + r->name = NULL; +} + +static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( + upb_handlers *h) { + upb_mhandlers *m = upb_handlers_newmhandlers(h); + upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg); + upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg); + +#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM +#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE + upb_fhandlers *f; + f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); + upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name); + + f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false); + upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number); + return m; +} +#undef FNUM +#undef FTYPE + +// google.protobuf.EnumDescriptorProto. +static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) { + upb_descreader *r = _r; + upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new())); + return UPB_CONTINUE; +} + +static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) { + upb_descreader *r = _r; + upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) { + upb_seterr(status, UPB_ERROR, "Enum had no name."); + return; + } + if (upb_inttable_count(&e->iton) == 0) { + upb_seterr(status, UPB_ERROR, "Enum had no values."); + return; + } +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, + upb_value fval, + upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + upb_string_unref(e->base.fqname); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; +} + +static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { + upb_mhandlers *m = upb_handlers_newmhandlers(h); + upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg); + upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg); + +#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM +#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE + upb_fhandlers *f = + upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); + upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name); + + upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true, + upb_enumdef_register_EnumValueDescriptorProto(h)); + return m; +} +#undef FNUM +#undef FTYPE + +static upb_flow_t upb_fielddef_startmsg(void *_r) { + upb_descreader *r = _r; + r->f = upb_fielddef_new(); + return UPB_CONTINUE; +} + +// Converts the default value in string "dstr" into "d". Passes a ref on dstr. +// Returns true on success. +static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) { + bool success = true; + if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { + // We'll keep the ref we had on it. We include enums in this case because + // we need the enumdef to resolve the name, but we may not have it yet. + // We'll resolve it later. + if (dstr) { + upb_value_setstr(d, dstr); + } else { + upb_value_setstr(d, upb_emptystring()); + } + } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { + // We don't expect to get a default value. + upb_string_unref(dstr); + if (dstr != NULL) success = false; + } else { + // The strto* functions need the string to be NULL-terminated. + char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); + char *end; + upb_string_unref(dstr); + switch (type) { + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(SFIXED32): + if (strz) { + long val = strtol(strz, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) + success = false; + else + upb_value_setint32(d, val); + } else { + upb_value_setint32(d, 0); + } + break; + case UPB_TYPE(INT64): + case UPB_TYPE(SINT64): + case UPB_TYPE(SFIXED64): + if (strz) { + upb_value_setint64(d, strtoll(strz, &end, 0)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setint64(d, 0); + } + break; + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + if (strz) { + unsigned long val = strtoul(strz, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) + success = false; + else + upb_value_setuint32(d, val); + } else { + upb_value_setuint32(d, 0); + } + break; + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + if (strz) { + upb_value_setuint64(d, strtoull(strz, &end, 0)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setuint64(d, 0); + } + break; + case UPB_TYPE(DOUBLE): + if (strz) { + upb_value_setdouble(d, strtod(strz, &end)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setdouble(d, 0.0); + } + break; + case UPB_TYPE(FLOAT): + if (strz) { + upb_value_setfloat(d, strtof(strz, &end)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setfloat(d, 0.0); + } + break; + case UPB_TYPE(BOOL): + if (!strz || strcmp(strz, "false") == 0) + upb_value_setbool(d, false); + else if (strcmp(strz, "true") == 0) + upb_value_setbool(d, true); + else + success = false; + break; + } + free(strz); + } + return success; +} + +static void upb_fielddef_endmsg(void *_r, upb_status *status) { + upb_descreader *r = _r; + upb_fielddef *f = r->f; + // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); + assert((f->def != NULL) == upb_hasdef(f)); + + // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_descreader_top(r); + upb_msgdef_addfield(m, f); + upb_string *dstr = r->default_string; + r->default_string = NULL; + upb_value val; + if (!upb_fielddef_parsedefault(dstr, &val, f->type)) { + // We don't worry too much about giving a great error message since the + // compiler should have ensured this was correct. + upb_seterr(status, UPB_ERROR, "Error converting default value."); + return; + } + upb_fielddef_setdefault(f, val); +} + +static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_fielddef_settype(r->f, upb_value_getint32(val)); + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_fielddef_setlabel(r->f, upb_value_getint32(val)); + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_fielddef_setnumber(r->f, upb_value_getint32(val)); + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_fielddef_setname(r->f, upb_value_getstr(val)); + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, + upb_value val) { + (void)fval; + upb_descreader *r = _r; + upb_fielddef_settypename(r->f, upb_value_getstr(val)); + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval, + upb_value val) { + (void)fval; + upb_descreader *r = _r; + // Have to convert from string to the correct type, but we might not know the + // type yet. + upb_string_unref(r->default_string); + r->default_string = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; +} + +static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( + upb_handlers *h) { + upb_mhandlers *m = upb_handlers_newmhandlers(h); + upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg); + upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg); + +#define FIELD(name, handler) \ + upb_fhandlers_setvalue( \ + upb_mhandlers_newfhandlers(m, \ + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \ + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \ + false), \ + handler); + FIELD(TYPE, &upb_fielddef_ontype); + FIELD(LABEL, &upb_fielddef_onlabel); + FIELD(NUMBER, &upb_fielddef_onnumber); + FIELD(NAME, &upb_fielddef_onname); + FIELD(TYPE_NAME, &upb_fielddef_ontypename); + FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval); + return m; +} +#undef FNUM +#undef FTYPE + + +// google.protobuf.DescriptorProto. +static upb_flow_t upb_msgdef_startmsg(void *_r) { + upb_descreader *r = _r; + upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new())); + upb_descreader_startcontainer(r); + return UPB_CONTINUE; +} + +static void upb_msgdef_endmsg(void *_r, upb_status *status) { + upb_descreader *r = _r; + upb_msgdef *m = upb_descreader_top(r); + if(!m->base.fqname) { + upb_seterr(status, UPB_ERROR, "Encountered message with no name."); + return; + } + + upb_msgdef_layout(m); + upb_descreader_endcontainer(r); +} + +static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { + (void)fval; + upb_descreader *r = _r; + assert(val.type == UPB_TYPE(STRING)); + upb_msgdef *m = upb_descreader_top(r); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_descreader_setscopename(r, upb_value_getstr(val)); + return UPB_CONTINUE; +} + +static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { + upb_mhandlers *m = upb_handlers_newmhandlers(h); + upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg); + upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg); + +#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM +#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE + upb_fhandlers *f = + upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); + upb_fhandlers_setvalue(f, &upb_msgdef_onname); + + upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true, + upb_fielddef_register_FieldDescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, + upb_enumdef_register_EnumDescriptorProto(h)); + + // DescriptorProto is self-recursive, so we must link the definition. + upb_mhandlers_newfhandlers_subm( + m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m); + + // TODO: extensions. + return m; +} +#undef FNUM +#undef FTYPE + diff --git a/src/upb_descriptor.h b/src/upb_descriptor.h new file mode 100644 index 0000000..f74de3b --- /dev/null +++ b/src/upb_descriptor.h @@ -0,0 +1,67 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Routines for building defs by parsing descriptors in descriptor.proto format. + * This only needs to use the public API of upb_symtab. Later we may also + * add routines for dumping a symtab to a descriptor. + */ + +#ifndef UPB_DESCRIPTOR_H +#define UPB_DESCRIPTOR_H + +#include "upb_handlers.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_descreader ************************************************************/ + +// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn. + +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). +typedef struct { + upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. + int start; +} upb_descreader_frame; + +typedef struct { + upb_deflist defs; + upb_symtabtxn *txn; + upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; + upb_status status; + + uint32_t number; + upb_string *name; + bool saw_number; + bool saw_name; + + upb_string *default_string; + + upb_fielddef *f; +} upb_descreader; + +// Creates a new descriptor builder that will add defs to the given txn. +void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn); +void upb_descreader_uninit(upb_descreader *r); + +// Registers handlers that will load descriptor data into a symtabtxn. +// Pass the descreader as the closure. The messages will have +// upb_msgdef_layout() called on them before adding to the txn. +upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_glue.c b/src/upb_glue.c index 1422463..f288855 100644 --- a/src/upb_glue.c +++ b/src/upb_glue.c @@ -5,32 +5,29 @@ * Author: Josh Haberman */ +#include "upb_decoder.h" +#include "upb_descriptor.h" #include "upb_glue.h" #include "upb_msg.h" -#include "upb_decoder.h" #include "upb_strstream.h" #include "upb_textprinter.h" -void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, +void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md, upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str); - upb_handlers *h = upb_handlers_new(); - upb_msg_reghandlers(h, md); - upb_decoder d; - upb_decoder_init(&d, h); + upb_decoder_initformsgdef(&d, md); upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg); - upb_handlers_unref(h); - upb_decoder_decode(&d, status); upb_stringsrc_uninit(&strsrc); upb_decoder_uninit(&d); } +#if 0 void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, bool single_line) { upb_stringsink strsink; @@ -53,23 +50,49 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, upb_textprinter_free(p); upb_handlers_unref(h); } +#endif -void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { +// TODO: read->load. +void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str); upb_handlers *h = upb_handlers_new(); - upb_defbuilder_reghandlers(h); + upb_descreader_reghandlers(h); upb_decoder d; - upb_decoder_init(&d, h); + upb_decoder_initforhandlers(&d, h); upb_handlers_unref(h); - upb_defbuilder *b = upb_defbuilder_new(symtab); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), b); + upb_descreader r; + upb_symtabtxn txn; + upb_symtabtxn_init(&txn); + upb_descreader_init(&r, &txn); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r); upb_decoder_decode(&d, status); + // Set default accessors and layouts on all messages. + // for msgdef in symtabtxn: + upb_symtabtxn_iter i; + for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i); + i = upb_symtabtxn_next(&txn, i)) { + upb_def *def = upb_symtabtxn_iter_def(i); + upb_msgdef *md = upb_dyncast_msgdef(def); + if (!md) return; + // For field in msgdef: + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f)); + } + upb_msgdef_layout(md); + } + + if (upb_ok(status)) upb_symtab_commit(symtab, &txn, status); + + upb_symtabtxn_uninit(&txn); + upb_descreader_uninit(&r); upb_stringsrc_uninit(&strsrc); upb_decoder_uninit(&d); } diff --git a/src/upb_glue.h b/src/upb_glue.h index d1a26d1..27611cd 100644 --- a/src/upb_glue.h +++ b/src/upb_glue.h @@ -42,14 +42,14 @@ struct _upb_symtab; // Decodes the given string, which must be in protobuf binary format, to the // given upb_msg with msgdef "md", storing the status of the operation in "s". -void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg, +void upb_strtomsg(struct _upb_string *str, void *msg, struct _upb_msgdef *md, struct _upb_status *s); -void upb_msgtotext(struct _upb_string *str, struct _upb_msg *msg, +void upb_msgtotext(struct _upb_string *str, void *msg, struct _upb_msgdef *md, bool single_line); -void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str, - struct _upb_status *status); +void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str, + struct _upb_status *status); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_handlers.c b/src/upb_handlers.c index e630975..c29281a 100644 --- a/src/upb_handlers.c +++ b/src/upb_handlers.c @@ -123,9 +123,9 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m, fieldreg_cb, closure, mtab); } fh = upb_mhandlers_newfhandlers_subm( - mh, f->number, f->type, upb_isarray(f), sub_mh); + mh, f->number, f->type, upb_isseq(f), sub_mh); } else { - fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isarray(f)); + fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f)); } if (fieldreg_cb) fieldreg_cb(closure, fh, f); } diff --git a/src/upb_msg.c b/src/upb_msg.c index 91f1454..b88df32 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -9,201 +9,23 @@ #include "upb_msg.h" -static uint32_t upb_round_up_pow2(uint32_t v) { - // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -static void upb_elem_free(upb_value v, upb_fielddef *f) { - switch(f->type) { - case UPB_TYPE(MESSAGE): - case UPB_TYPE(GROUP): - _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def)); - break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - _upb_string_free(upb_value_getstr(v)); - break; - default: - abort(); - } -} - -static void upb_elem_unref(upb_value v, upb_fielddef *f) { - assert(upb_elem_ismm(f)); - upb_atomic_t *refcount = upb_value_getrefcount(v); - if (refcount && upb_atomic_unref(refcount)) - upb_elem_free(v, f); -} - -static void upb_field_free(upb_value v, upb_fielddef *f) { - if (upb_isarray(f)) { - _upb_array_free(upb_value_getarr(v), f); - } else { - upb_elem_free(v, f); - } -} - -static void upb_field_unref(upb_value v, upb_fielddef *f) { - assert(upb_field_ismm(f)); - upb_atomic_t *refcount = upb_value_getrefcount(v); - if (refcount && upb_atomic_unref(refcount)) - upb_field_free(v, f); -} - - -/* upb_array ******************************************************************/ - -upb_array *upb_array_new(void) { - upb_array *arr = malloc(sizeof(*arr)); - upb_atomic_init(&arr->refcount, 1); - arr->size = 0; - arr->len = 0; - arr->ptr = NULL; - return arr; -} - -void __attribute__((noinline)) upb_array_dorecycle(upb_array **_arr) { - upb_array *arr = *_arr; - if(arr && upb_atomic_only(&arr->refcount)) { - arr->len = 0; - } else { - if (arr) { - bool was_lastref = upb_atomic_unref(&arr->refcount); - (void)was_lastref; - assert(!was_lastref); // If it was, we would have just recycled. - } - *_arr = upb_array_new(); - } -} - -void upb_array_recycle(upb_array **_arr) { - upb_array *arr = *_arr; - if(arr && upb_atomic_only(&arr->refcount)) { - arr->len = 0; - } else { - upb_array_dorecycle(_arr); - } -} - -void _upb_array_free(upb_array *arr, upb_fielddef *f) { - if (upb_elem_ismm(f)) { - // Need to release refs on sub-objects. - upb_valuetype_t type = upb_elem_valuetype(f); - for (int32_t i = 0; i < arr->size; i++) { - upb_valueptr p = _upb_array_getptr(arr, f, i); - upb_elem_unref(upb_value_read(p, type), f); - } - } - free(arr->ptr); - free(arr); -} - -void __attribute__((noinline)) upb_array_doresize( - upb_array *arr, size_t type_size, upb_arraylen_t len) { - upb_arraylen_t old_size = arr->size; - size_t new_size = upb_round_up_pow2(len); - arr->ptr = realloc(arr->ptr, new_size * type_size); - arr->size = new_size; - memset(arr->ptr + (old_size * type_size), 0, - (new_size - old_size) * type_size); -} - -void upb_array_resizefortypesize(upb_array *arr, size_t type_size, - int32_t len) { - assert(len >= 0); - if (arr->size < len) upb_array_doresize(arr, type_size, len); - arr->len = len; -} - -void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) { - upb_array_resizefortypesize(arr, upb_types[f->type].size, len); -} - - -/* upb_msg ********************************************************************/ - -upb_msg *upb_msg_new(upb_msgdef *md) { - upb_msg *msg = malloc(md->size); - // Clear all set bits and cached pointers. - memset(msg, 0, md->size); - upb_atomic_init(&msg->refcount, 1); - return msg; -} - -void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { - // Need to release refs on all sub-objects. - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f); - } - free(msg); -} - -void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) { - upb_msg *msg = *_msg; - if(msg && upb_atomic_only(&msg->refcount)) { - upb_msg_clear(msg, msgdef); - } else { - upb_msg_unref(msg, msgdef); - if (msg) { - bool was_lastref = upb_atomic_unref(&msg->refcount); - (void)was_lastref; - assert(!was_lastref); - } - *_msg = upb_msg_new(msgdef); - } -} - -INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { - msg->data[f->set_bit_offset] |= f->set_bit_mask; -} - -void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { - assert(val.type == upb_types[upb_field_valuetype(f)].inmemory_type); - upb_valueptr ptr = _upb_msg_getptr(msg, f); - if (upb_field_ismm(f)) { - // Unref any previous value we may have had there. - upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f)); - upb_field_unref(oldval, f); - - // Ref the new value. - upb_atomic_t *refcount = upb_value_getrefcount(val); - if (refcount) upb_atomic_ref(refcount); - } - upb_msg_sethas(msg, f); - return upb_value_write(ptr, val, upb_field_valuetype(f)); -} - -upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - if (!upb_msg_has(msg, f)) { - upb_value val = f->default_value; - if (upb_issubmsg(f)) { - // TODO: handle arrays also, which must be treated similarly. - upb_msgdef *md = upb_downcast_msgdef(f->def); - upb_msg *m = upb_msg_new(md); - // Copy all set bits and values, except the refcount. - memcpy(m , upb_value_getmsg(val), md->size); - upb_atomic_init(&m->refcount, 0); // The msg will take a ref. - upb_value_setmsg(&val, m); - } - upb_msg_set(msg, f, val); - return val; - } else { - return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); +void upb_msg_clear(void *msg, upb_msgdef *md) { + memset(msg, 0, md->hasbit_bytes); + // TODO: set primitive fields to defaults? +} + +void *upb_stdarray_append(upb_stdarray *a, size_t type_size) { + assert(a->len <= a->size); + if (a->len == a->size) { + size_t old_size = a->size; + a->size = old_size == 0 ? 8 : (old_size * 2); + a->ptr = realloc(a->ptr, a->size * type_size); + memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size); } + return &a->ptr[a->len++ * type_size]; } +#if 0 static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, upb_dispatcher *d); @@ -253,110 +75,64 @@ void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h, upb_dispatcher_uninit(&d); } +#endif -static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p = _upb_msg_getptr(msg, f); - if (upb_isarray(f)) { - // Create/recycle/resize the array if necessary, and find a pointer to - // a newly-appended element. - if (!upb_msg_has(msg, f)) { - upb_array_recycle(p.arr); - upb_msg_sethas(msg, f); - } - assert(*p.arr != NULL); - upb_arraylen_t oldlen = upb_array_len(*p.arr); - upb_array_resize(*p.arr, f, oldlen + 1); - p = _upb_array_getptr(*p.arr, f, oldlen); - } - return p; -} - -upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) { - upb_valueptr p = upb_msg_getappendptr(msg, f); - if (upb_isarray(f) || !upb_msg_has(msg, f)) { - upb_msg_recycle(p.msg, msgdef); - upb_msg_sethas(msg, f); - } - return *p.msg; -} - - -/* upb_msg handlers ***********************************************************/ +/* Standard writers. **********************************************************/ -#if UPB_MAX_FIELDS > 2048 -#error "We're using an 8-bit integer to store a has_offset." -#endif -typedef struct { - uint8_t has_offset; - uint8_t has_mask; - uint16_t val_offset; - uint16_t msg_size; - uint8_t set_flags_bytes; - uint8_t padding; -} upb_msgsink_fval; - -static upb_msgsink_fval upb_msgsink_unpackfval(upb_value fval) { - assert(sizeof(upb_msgsink_fval) == 8); - upb_msgsink_fval ret; - uint64_t fval_u64 = upb_value_getuint64(fval); - memcpy(&ret, &fval_u64, 8); - return ret; +void upb_stdmsg_sethas(void *_m, upb_value fval) { + char *m = _m; + upb_fielddef *f = upb_value_getfielddef(fval); + if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8)); } -static uint64_t upb_msgsink_packfval(uint8_t has_offset, uint8_t has_mask, - uint16_t val_offset, uint16_t msg_size, - uint8_t set_flags_bytes) { - upb_msgsink_fval fval = { - has_offset, has_mask, val_offset, msg_size, set_flags_bytes, 0}; - uint64_t ret = 0; - memcpy(&ret, &fval, sizeof(fval)); - return ret; +bool upb_stdmsg_has(void *_m, upb_value fval) { + char *m = _m; + upb_fielddef *f = upb_value_getfielddef(fval); + return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8))); } -#define SCALAR_VALUE_CB_PAIR(type, ctype) \ - upb_flow_t upb_msgsink_ ## type ## value(void *_m, upb_value _fval, \ - upb_value val) { \ - upb_msg *m = _m; \ - upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); \ - m->data[fval.has_offset] |= fval.has_mask; \ - *(ctype*)&m->data[fval.val_offset] = upb_value_get ## type(val); \ +#define UPB_ACCESSORS(type, ctype) \ + upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \ + upb_value val) { \ + upb_fielddef *f = upb_value_getfielddef(fval); \ + uint8_t *m = _m; \ + upb_stdmsg_sethas(_m, fval); \ + *(ctype*)&m[f->offset] = upb_value_get ## type(val); \ return UPB_CONTINUE; \ } \ \ - upb_flow_t upb_msgsink_ ## type ## value_r(void *_a, upb_value _fval, \ - upb_value val) { \ + upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \ + upb_value val) { \ (void)_fval; \ - upb_array *arr = _a; \ - upb_array_resizefortypesize(arr, sizeof(ctype), arr->len+1); \ - upb_valueptr p = _upb_array_getptrforsize(arr, sizeof(ctype), \ - arr->len-1); \ - *(ctype*)p._void = upb_value_get ## type(val); \ + ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \ + *p = upb_value_get ## type(val); \ return UPB_CONTINUE; \ } \ - -SCALAR_VALUE_CB_PAIR(double, double) -SCALAR_VALUE_CB_PAIR(float, float) -SCALAR_VALUE_CB_PAIR(int32, int32_t) -SCALAR_VALUE_CB_PAIR(int64, int64_t) -SCALAR_VALUE_CB_PAIR(uint32, uint32_t) -SCALAR_VALUE_CB_PAIR(uint64, uint64_t) -SCALAR_VALUE_CB_PAIR(bool, bool) - -upb_sflow_t upb_msgsink_startseq(void *_m, upb_value _fval) { - upb_msg *m = _m; - upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); - upb_array **arr = (upb_array**)&m->data[fval.val_offset]; - if (!(m->data[fval.has_offset] & fval.has_mask)) { - upb_array_recycle(arr); - m->data[fval.has_offset] |= fval.has_mask; + \ + upb_value upb_stdmsg_get ## type(void *_m, upb_value fval) { \ + uint8_t *m = _m; \ + upb_fielddef *f = upb_value_getfielddef(fval); \ + upb_value ret; \ + upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \ + return ret; \ + } \ + upb_value upb_stdmsg_seqget ## type(void *i) { \ + upb_value val; \ + upb_value_set ## type(&val, *(ctype*)i); \ + return val; \ } - return UPB_CONTINUE_WITH(*arr); -} -upb_flow_t upb_msgsink_strvalue(void *_m, upb_value _fval, upb_value val) { - upb_msg *m = _m; - upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); - m->data[fval.has_offset] |= fval.has_mask; +UPB_ACCESSORS(double, double) +UPB_ACCESSORS(float, float) +UPB_ACCESSORS(int32, int32_t) +UPB_ACCESSORS(int64, int64_t) +UPB_ACCESSORS(uint32, uint32_t) +UPB_ACCESSORS(uint64, uint64_t) +UPB_ACCESSORS(bool, bool) +UPB_ACCESSORS(ptr, void*) +#undef UPB_ACCESSORS + +static void _upb_stdmsg_setstr(void *_dst, upb_value _src) { // We do: // - upb_string_recycle(), upb_string_substr() instead of // - upb_string_unref(), upb_string_getref() @@ -369,115 +145,204 @@ upb_flow_t upb_msgsink_strvalue(void *_m, upb_value _fval, upb_value val) { // allocate string objects whereas a upb_string_getref could have avoided // those allocations completely; if this is an issue, we could make it an // option of the upb_msgsink which behavior is desired. - upb_string *src = upb_value_getstr(val); - upb_string **dst = (void*)&m->data[fval.val_offset]; + upb_string **dst = _dst; + upb_string *src = upb_value_getstr(_src); upb_string_recycle(dst); upb_string_substr(*dst, src, 0, upb_string_len(src)); +} + +upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) { + char *m = _m; + upb_fielddef *f = upb_value_getfielddef(fval); + upb_stdmsg_sethas(_m, fval); + _upb_stdmsg_setstr(&m[f->offset], val); return UPB_CONTINUE; } -upb_flow_t upb_msgsink_strvalue_r(void *_a, upb_value _fval, - upb_value val) { - upb_array *arr = _a; - (void)_fval; - upb_array_resizefortypesize(arr, sizeof(void*), arr->len+1); - upb_valueptr p = _upb_array_getptrforsize(arr, sizeof(void*), - upb_array_len(arr)-1); - upb_string *src = upb_value_getstr(val); - upb_string_recycle(p.str); - upb_string_substr(*p.str, src, 0, upb_string_len(src)); +upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) { + (void)fval; + _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val); return UPB_CONTINUE; } +upb_value upb_stdmsg_getstr(void *m, upb_value fval) { + upb_value val = upb_stdmsg_getptr(m, fval); + upb_value_setstr(&val, upb_value_getptr(val)); + return val; +} + +upb_value upb_stdmsg_seqgetstr(void *i) { + upb_value val = upb_stdmsg_seqgetptr(i); + upb_value_setstr(&val, upb_value_getptr(val)); + return val; +} + +void *upb_stdmsg_new(upb_msgdef *md) { + void *m = malloc(md->size); + memset(m, 0, md->size); + upb_msg_clear(m, md); + return m; +} -upb_sflow_t upb_msgsink_startsubmsg(void *_m, upb_value _fval) { - upb_msg *msg = _m; - upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); - - upb_msgdef md; - md.size = fval.msg_size; - md.set_flags_bytes = fval.set_flags_bytes; - upb_fielddef f; - f.set_bit_mask = fval.has_mask; - f.set_bit_offset = fval.has_offset; - f.label = UPB_LABEL(OPTIONAL); // Just not repeated. - f.type = UPB_TYPE(MESSAGE); - f.byte_offset = fval.val_offset; - - upb_msg **subm = _upb_msg_getptr(msg, &f).msg; - if (!upb_msg_has(msg, &f)) { - upb_msg_recycle(subm, &md); - upb_msg_sethas(msg, &f); +void upb_stdseq_free(void *s, upb_fielddef *f) { + upb_stdarray *a = s; + if (upb_issubmsg(f) || upb_isstring(f)) { + void **p = (void**)a->ptr; + for (int i = 0; i < a->size; i++) { + if (upb_issubmsg(f)) { + upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def)); + } else { + upb_string_unref(p[i]); + } + } + } + free(a->ptr); + free(a); +} + +void upb_stdmsg_free(void *m, upb_msgdef *md) { + if (m == NULL) return; + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue; + void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval)); + if (subp == NULL) continue; + if (upb_isseq(f)) { + upb_stdseq_free(subp, f); + } else if (upb_issubmsg(f)) { + upb_stdmsg_free(subp, upb_downcast_msgdef(f->def)); + } else { + upb_string_unref(subp); + } + } + free(m); +} + +upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) { + char *m = _m; + upb_fielddef *f = upb_value_getfielddef(fval); + upb_stdarray **arr = (void*)&m[f->offset]; + if (!upb_stdmsg_has(_m, fval)) { + if (!*arr) { + *arr = malloc(sizeof(**arr)); + (*arr)->size = 0; + (*arr)->ptr = NULL; + } + (*arr)->len = 0; + upb_stdmsg_sethas(m, fval); + } + return UPB_CONTINUE_WITH(*arr); +} + +void upb_stdmsg_recycle(void **m, upb_msgdef *md) { + if (*m) + upb_msg_clear(*m, md); + else + *m = upb_stdmsg_new(md); +} + +upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) { + char *m = _m; + upb_fielddef *f = upb_value_getfielddef(fval); + void **subm = (void*)&m[f->offset]; + if (!upb_stdmsg_has(m, fval)) { + upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def)); + upb_stdmsg_sethas(m, fval); } return UPB_CONTINUE_WITH(*subm); } -upb_sflow_t upb_msgsink_startsubmsg_r(void *_a, upb_value _fval) { - upb_array *a = _a; +upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) { assert(a != NULL); - upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); - - upb_msgdef md; - md.size = fval.msg_size; - md.set_flags_bytes = fval.set_flags_bytes; - upb_fielddef f; - f.set_bit_mask = fval.has_mask; - f.set_bit_offset = fval.has_offset; - f.label = UPB_LABEL(REPEATED); - f.type = UPB_TYPE(MESSAGE); - f.byte_offset = fval.val_offset; - - upb_arraylen_t oldlen = upb_array_len(a); - upb_array_resize(a, &f, oldlen + 1); - upb_valueptr p = _upb_array_getptr(a, &f, oldlen); - upb_msg_recycle(p.msg, &md); - return UPB_CONTINUE_WITH(*p.msg); + upb_fielddef *f = upb_value_getfielddef(fval); + void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*)); + upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def)); + return UPB_CONTINUE_WITH(*subm); } -INLINE void upb_msg_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) { - (void)c; - uint16_t msg_size = 0; - uint8_t set_flags_bytes = 0; - if (upb_issubmsg(f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - msg_size = md->size; - set_flags_bytes = md->set_flags_bytes; +void *upb_stdmsg_seqbegin(void *_a) { + upb_stdarray *a = _a; + return a->len > 0 ? a->ptr : NULL; +} + +#define NEXTFUNC(size) \ + void *upb_stdmsg_ ## size ## byte_seqnext(void *_a, void *iter) { \ + upb_stdarray *a = _a; \ + void *next = (char*)iter + size; \ + return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \ } - upb_value_setuint64(&fh->fval, - upb_msgsink_packfval(f->set_bit_offset, f->set_bit_mask, - f->byte_offset, msg_size, set_flags_bytes)); - if (fh->repeated) upb_fhandlers_setstartseq(fh, upb_msgsink_startseq); -#define CASE(upb_type, type) \ -case UPB_TYPE(upb_type): \ - upb_fhandlers_setvalue(fh, upb_isarray(f) ? \ - upb_msgsink_ ## type ## value_r : upb_msgsink_ ## type ## value); \ - break; - switch (f->type) { - CASE(DOUBLE, double) - CASE(FLOAT, float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) -#undef CASE - case UPB_TYPE(MESSAGE): - case UPB_TYPE(GROUP): - upb_fhandlers_setstartsubmsg(fh, - upb_isarray(f) ? upb_msgsink_startsubmsg_r : upb_msgsink_startsubmsg); - break; + +NEXTFUNC(8) +NEXTFUNC(4) +NEXTFUNC(1) + +#define STDMSG(type) { static upb_accessor_vtbl vtbl = {NULL, &upb_stdmsg_startsubmsg, \ + &upb_stdmsg_set ## type, &upb_stdmsg_has, &upb_stdmsg_get ## type, \ + NULL, NULL, NULL}; return &vtbl; } +#define STDMSG_R(type, size) { static upb_accessor_vtbl vtbl = { \ + &upb_stdmsg_startseq, &upb_stdmsg_startsubmsg_r, &upb_stdmsg_set ## type ## _r, \ + &upb_stdmsg_has, &upb_stdmsg_getptr, &upb_stdmsg_seqbegin, \ + &upb_stdmsg_ ## size ## byte_seqnext, &upb_stdmsg_seqget ## type}; \ + return &vtbl; } + +upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) { + if (upb_isseq(f)) { + switch (f->type) { + case UPB_TYPE(DOUBLE): STDMSG_R(double, 8) + case UPB_TYPE(FLOAT): STDMSG_R(float, 4) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): STDMSG_R(uint64, 8) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): STDMSG_R(int64, 8) + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(ENUM): + case UPB_TYPE(SFIXED32): STDMSG_R(int32, 4) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): STDMSG_R(uint32, 4) + case UPB_TYPE(BOOL): STDMSG_R(bool, 1) + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + case UPB_TYPE(GROUP): + case UPB_TYPE(MESSAGE): STDMSG_R(str, 8) // TODO: 32-bit + } + } else { + switch (f->type) { + case UPB_TYPE(DOUBLE): STDMSG(double) + case UPB_TYPE(FLOAT): STDMSG(float) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): STDMSG(uint64) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): STDMSG(int64) + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(ENUM): + case UPB_TYPE(SFIXED32): STDMSG(int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): STDMSG(uint32) + case UPB_TYPE(BOOL): STDMSG(bool) + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + case UPB_TYPE(GROUP): + case UPB_TYPE(MESSAGE): STDMSG(str) + } + } + return NULL; +} + +static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) { + (void)c; + if (f->accessor) { + upb_fhandlers_setstartseq(fh, f->accessor->appendseq); + upb_fhandlers_setvalue(fh, f->accessor->set); + upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg); + upb_fhandlers_setfval(fh, f->fval); } } -upb_mhandlers *upb_msg_reghandlers(upb_handlers *h, upb_msgdef *m) { - return upb_handlers_regmsgdef(h, m, NULL, &upb_msg_onfreg, NULL); +upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m) { + return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL); } diff --git a/src/upb_msg.h b/src/upb_msg.h index 4e1b4d5..b93037b 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -4,285 +4,122 @@ * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * Data structure for storing a message of protobuf data. Unlike Google's - * protobuf, upb_msg and upb_array are reference counted instead of having - * exclusive ownership of their fields. This is a better match for dynamic - * languages where statements like a.b = other_b are normal. + * Routines for reading and writing message data to an in-memory structure, + * similar to a C struct. * - * upb's parsers and serializers could also be used to populate and serialize - * other kinds of message objects (even one generated by Google's protobuf). + * upb does not define one single message object that everyone must use. + * Rather it defines an abstract interface for reading and writing members + * of a message object, and all of the parsers and serializers use this + * abstract interface. This allows upb's parsers and serializers to be used + * regardless of what memory management scheme or synchronization model the + * application is using. * - * TODO: consider properly supporting const instances. + * A standard set of accessors is provided for doing simple reads and writes at + * a known offset into the message. These accessors should be used when + * possible, because they are specially optimized -- for example, the JIT can + * recognize them and emit specialized code instead of having to call the + * function at all. The application can substitute its own accessors when the + * standard accessors are not suitable. */ #ifndef UPB_MSG_H #define UPB_MSG_H #include +#include "upb_def.h" #include "upb_handlers.h" #ifdef __cplusplus extern "C" { #endif -// A pointer to a .proto value. The owner must have an out-of-band way of -// knowing the type, so it knows which union member to use. -typedef union { - double *_double; - float *_float; - int32_t *int32; - int64_t *int64; - uint8_t *uint8; - uint32_t *uint32; - uint64_t *uint64; - bool *_bool; - upb_string **str; - upb_msg **msg; - upb_array **arr; - void *_void; -} upb_valueptr; - -INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->val._double}; - return ptr; -} -// Reads or writes a upb_value from an address represented by a upb_value_ptr. -// We need to know the value type to perform this operation, because we need to -// know how much memory to copy (and for big-endian machines, we need to know -// where in the upb_value the data goes). -// -// For little endian-machines where we didn't mind overreading, we could make -// upb_value_read simply use memcpy(). -INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { - upb_value val; - -#ifdef NDEBUG -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; -#else -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = upb_types[ft].inmemory_type; break; -#endif +/* upb_accessor ***************************************************************/ - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - case UPB_VALUETYPE_ARRAY: - val.val.arr = *ptr.arr; -#ifndef NDEBUG - val.type = UPB_VALUETYPE_ARRAY; -#endif - break; - default: assert(false); - } - return val; +// A upb_accessor is a table of function pointers for doing reads and writes +// for one specific upb_fielddef. Each field has a separate accessor, which +// lives in the fielddef. -#undef CASE -} +typedef bool upb_has_reader(void *m, upb_value fval); +typedef upb_value upb_value_reader(void *m, upb_value fval); -INLINE void upb_value_write(upb_valueptr ptr, upb_value val, - upb_fieldtype_t ft) { -#ifndef NDEBUG - if (ft == UPB_VALUETYPE_ARRAY) { - assert(val.type == UPB_VALUETYPE_ARRAY); - } else if (val.type != UPB_VALUETYPE_RAW) { - assert(val.type == upb_types[ft].inmemory_type); - } -#endif -#define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - case UPB_VALUETYPE_ARRAY: - *ptr.arr = val.val.arr; - break; - default: assert(false); - } - -#undef CASE -} +typedef void *upb_seqbegin_handler(void *s); +typedef void *upb_seqnext_handler(void *s, void *iter); +typedef upb_value upb_seqget_handler(void *iter); +INLINE bool upb_seq_done(void *iter) { return iter == NULL; } +typedef struct _upb_accessor_vtbl { + // Writers. These take an fval as a parameter because the callbacks are used + // as upb_handlers, but the fval is always the fielddef for that field. + upb_startfield_handler *appendseq; // Repeated fields only. + upb_startfield_handler *appendsubmsg; // Submsg fields (repeated or no). + upb_value_handler *set; // Scalar fields (repeated or no). -/* upb_array ******************************************************************/ + // Readers. + upb_has_reader *has; + upb_value_reader *get; + upb_seqbegin_handler *seqbegin; + upb_seqnext_handler *seqnext; + upb_seqget_handler *seqget; +} upb_accessor_vtbl; -typedef uint32_t upb_arraylen_t; -struct _upb_array { - upb_atomic_t refcount; - // "len" and "size" are measured in elements, not bytes. - int32_t len; - int32_t size; - char *ptr; -}; - -void _upb_array_free(upb_array *a, upb_fielddef *f); -INLINE upb_valueptr _upb_array_getptrforsize(upb_array *a, size_t type_size, - int32_t elem) { - assert(elem >= 0); - upb_valueptr p; - p._void = &a->ptr[elem * type_size]; - return p; -} +// Registers handlers for writing into a message of the given type. +upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m); -INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, - uint32_t elem) { - return _upb_array_getptrforsize(a, upb_types[f->type].size, elem); -} +// Returns an stdmsg accessor for the given fielddef. +upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f); -upb_array *upb_array_new(void); -INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { - if (a && upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); -} +/* upb_msg/upb_seq ************************************************************/ -void upb_array_recycle(upb_array **arr); -INLINE uint32_t upb_array_len(upb_array *a) { - return a->len; -} +// upb_msg and upb_seq allow for generic access to a message through its +// accessor vtable. Note that these do *not* allow you to create, destroy, or +// take references on the objects -- these operations are specifically outside +// the scope of what the accessors define. -INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f, - upb_arraylen_t i) { - assert(i < upb_array_len(arr)); - return upb_value_read(_upb_array_getptr(arr, f, i), f->type); -} +// Clears all hasbits. +// TODO: Add a separate function for setting primitive values back to their +// defaults (but not strings, submessages, or arrays). +void upb_msg_clear(void *msg, upb_msgdef *md); +// Could add a method that recursively clears submessages, strings, and +// arrays if desired. This could be a win if you wanted to merge without +// needing hasbits, because during parsing you would never clear submessages +// or arrays. Also this could be desired to provide proto2 operations on +// generated messages. -/* upb_msg ********************************************************************/ - -// upb_msg is not self-describing; the upb_msg does not contain a pointer to the -// upb_msgdef. While this makes the API a bit more cumbersome to use, this -// choice was made for a few important reasons: -// -// 1. it would make every message 8 bytes larger on 64-bit platforms. This is -// a high overhead for small messages. -// 2. you would want the msg to own a ref on its msgdef, but this would require -// an atomic operation for every message create or destroy! -struct _upb_msg { - upb_atomic_t refcount; - uint8_t data[4]; // We allocate the appropriate amount per message. -}; - -void _upb_msg_free(upb_msg *msg, upb_msgdef *md); - -INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p; - p._void = &msg->data[f->byte_offset]; - return p; +INLINE bool upb_msg_has(void *m, upb_fielddef *f) { + return f->accessor && f->accessor->has(m, f->fval); } -// Creates a new msg of the given type. -upb_msg *upb_msg_new(upb_msgdef *md); - -// Unrefs the given message. -INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { - if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); -} - -INLINE upb_msg *upb_msg_getref(upb_msg *msg) { - assert(msg); - upb_atomic_ref(&msg->refcount); - return msg; +// May only be called for fields that are known to be set. +INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) { + assert(upb_msg_has(m, f)); + return f->accessor->get(m, f->fval); } -// Modifies *msg to point to a newly initialized msg instance. If the msg had -// no other referents, reuses the same msg, otherwise allocates a new one. -// The caller *must* own a ref on the msg prior to calling this method! -void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef); - -// Tests whether the given field is explicitly set, or whether it will return a -// default. -INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { - return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0; +INLINE void *upb_seq_begin(void *s, upb_fielddef *f) { + assert(f->accessor); + return f->accessor->seqbegin(s); } - -// We have several options for handling default values: -// 1. inside upb_msg_clear(), overwrite all values to be their defaults, -// overwriting submessage pointers to point to the default instance again. -// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value -// if it is not set. upb_msg_clear() only clears the set bits. -// We lazily clear objects if/when we reuse them. -// 3. inside upb_msg_clear(), overwrite all values to be their default, -// and recurse into submessages to set all their values to defaults also. -// 4. as a hybrid of (1) and (3), clear all set bits in upb_msg_clear() -// but also overwrite all primitive values to be their defaults. Only -// accessors for non-primitive values (submessage, strings, and arrays) -// need to check the has-bits in their accessors -- primitive values can -// always be returned straight from the msg. -// -// (1) is undesirable, because it prevents us from caching sub-objects. -// (2) makes clear() cheaper, but makes get() branchier. -// (3) makes get() less branchy, but makes clear() traverse the message graph. -// (4) is probably the best bang for the buck. -// -// For the moment upb does (2), but we should implement (4). Google's protobuf -// does (3), which is likely part of the reason that even our table-based -// decoder beats it in some benchmarks. - -// For submessages and strings, the returned value is not owned. -upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f); - -// A specialized version of the previous that is cheaper because it doesn't -// support submessages or arrays. -INLINE upb_value upb_msg_getscalar(upb_msg *msg, upb_fielddef *f) { - if (upb_msg_has(msg, f)) { - return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); - } else { - return f->default_value; - } +INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) { + assert(f->accessor); + assert(!upb_seq_done(iter)); + return f->accessor->seqnext(s, iter); } - -// Sets the given field to the given value. If the field is a string, array, -// or submessage, releases the ref on any object we may have been referencing -// and takes a ref on the new object (if any). -void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); - -// Unsets all field values back to their defaults. -INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { - memset(msg->data, 0, md->set_flags_bytes); +INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) { + assert(f->accessor); + assert(!upb_seq_done(iter)); + return f->accessor->seqget(iter); } -// Registers handlers for populating a msg for the given upb_msgdef. -// The upb_msg itself must be passed as the param to the src. -upb_mhandlers *upb_msg_reghandlers(upb_handlers *h, upb_msgdef *md); - /* upb_msgvisitor *************************************************************/ -// Calls a set of upb_handlers with the contents of a upb_msg. +// A upb_msgvisitor reads data from an in-memory structure using its accessors, +// pushing the results to a given set of upb_handlers. +// TODO: not yet implemented. + typedef struct { upb_fhandlers *fh; upb_fielddef *f; @@ -314,6 +151,118 @@ void upb_msgvisitor_uninit(upb_msgvisitor *v); void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m); void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status); + +/* Standard writers. **********************************************************/ + +// Allocates a new stdmsg. +void *upb_stdmsg_new(upb_msgdef *md); + +// Recursively frees any strings or submessages that the message refers to. +void upb_stdmsg_free(void *m, upb_msgdef *md); + +// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit. +upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset); +upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset, + uint16_t subm_size, uint8_t subm_setbytes); + +// Value writers for every in-memory type: write the data to a known offset +// from the closure "c" and set the hasbit (if any). +// TODO: can we get away with having only one for int64, uint64, double, etc? +// The main thing in the way atm is that the upb_value is strongly typed. +// in debug mode. +upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); + +// Value writers for repeated fields: the closure points to a standard array +// struct, appends the value to the end of the array, resizing with realloc() +// if necessary. +typedef struct { + char *ptr; + int32_t len; // Number of elements present. + int32_t size; // Number of elements allocated. +} upb_stdarray; + +upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val); + +// Writers for C strings (NULL-terminated): we can find a char* at a known +// offset from the closure "c". Calls realloc() on the pointer to allocate +// the memory (TODO: investigate whether checking malloc_usable_size() would +// be cheaper than realloc()). Also sets the hasbit, if any. +// +// Since the string is NULL terminated and does not store an explicit length, +// these are not suitable for binary data that can contain NULLs. +upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val); + +// Writers for length-delimited strings: we explicitly store the length, so +// the data can contain NULLs. Stores the data using upb_stdarray +// which is located at a known offset from the closure "c" (note that it +// is included inline rather than pointed to). Also sets the hasbit, if any. +upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val); +upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val); + +// Writers for startseq and startmsg which allocate (or reuse, if possible) +// a sub data structure (upb_stdarray or a submessage, respectively), +// setting the hasbit. If the hasbit is already set, the existing data +// structure is used verbatim. If the hasbit is not already set, the pointer +// is checked for NULL. If it is NULL, a new substructure is allocated, +// cleared, and used. If it is not NULL, the existing substructure is +// cleared and reused. +// +// If there is no hasbit, we always behave as if the hasbit was not set, +// so any existing data for this array or submessage is cleared. In most +// cases this will be fine since each array or non-repeated submessage should +// occur at most once in the stream. But if the client is using "concatenation +// as merging", it will want to make sure hasbits are allocated so merges can +// happen appropriately. +// +// If there was a demand for the behavior that absence of a hasbit acts as if +// the bit was always set, we could provide that also. But Clear() would need +// to act recursively, which is less efficient since it requires an extra pass +// over the tree. +upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval); +upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval); +upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval); + + +/* Standard readers. **********************************************************/ + +bool upb_stdmsg_has(void *c, upb_value fval); +void *upb_stdmsg_seqbegin(void *c); + +upb_value upb_stdmsg_getint64(void *c, upb_value fval); +upb_value upb_stdmsg_getint32(void *c, upb_value fval); +upb_value upb_stdmsg_getuint64(void *c, upb_value fval); +upb_value upb_stdmsg_getuint32(void *c, upb_value fval); +upb_value upb_stdmsg_getdouble(void *c, upb_value fval); +upb_value upb_stdmsg_getfloat(void *c, upb_value fval); +upb_value upb_stdmsg_getbool(void *c, upb_value fval); +upb_value upb_stdmsg_getptr(void *c, upb_value fval); + +void *upb_stdmsg_8byte_seqnext(void *c, void *iter); +void *upb_stdmsg_4byte_seqnext(void *c, void *iter); +void *upb_stdmsg_1byte_seqnext(void *c, void *iter); + +upb_value upb_stdmsg_seqgetint64(void *c); +upb_value upb_stdmsg_seqgetint32(void *c); +upb_value upb_stdmsg_seqgetuint64(void *c); +upb_value upb_stdmsg_seqgetuint32(void *c); +upb_value upb_stdmsg_seqgetdouble(void *c); +upb_value upb_stdmsg_seqgetfloat(void *c); +upb_value upb_stdmsg_seqgetbool(void *c); +upb_value upb_stdmsg_seqgetptr(void *c); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_string.h b/src/upb_string.h index 1463bbf..1f92850 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -107,6 +107,8 @@ void _upb_string_free(upb_string *str); // can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE // UNLESS THE STRING IS SYNCHRONIZED. INLINE void upb_string_unref(upb_string *str) { + if (str) { + } if (str && upb_atomic_read(&str->refcount) > 0 && upb_atomic_unref(&str->refcount)) { _upb_string_free(str); @@ -129,7 +131,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) { int refcount = upb_atomic_read(&str->refcount); if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); // We don't ref the special <0 refcount for static strings. - if (refcount > 0) upb_atomic_ref(&str->refcount); + if (refcount > 0) { + upb_atomic_ref(&str->refcount); + } return str; } diff --git a/src/upb_table.h b/src/upb_table.h index 9b53a37..631709c 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -103,6 +103,12 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) { void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val); void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update void upb_inttable_compact(upb_inttable *t); +INLINE void upb_strtable_clear(upb_strtable *t) { + // TODO: improve. + uint16_t entry_size = t->t.entry_size; + upb_strtable_free(t); + upb_strtable_init(t, 8, entry_size); +} INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { uint32_t bucket = k & t->t.mask; // Identity hash for ints. diff --git a/tests/test_decoder.c b/tests/test_decoder.c index e607827..7b168de 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -18,7 +18,7 @@ int main(int argc, char *argv[]) { } upb_status status = UPB_STATUS_INIT; - upb_parsedesc(symtab, desc, &status); + upb_read_descriptor(symtab, desc, &status); if (!upb_ok(&status)) { fprintf(stderr, "Error parsing descriptor: "); upb_printerr(&status); @@ -45,14 +45,13 @@ int main(int argc, char *argv[]) { upb_stdio *out = upb_stdio_new(); upb_stdio_reset(out, stdout); - upb_handlers handlers; - upb_handlers_init(&handlers); + upb_handlers *handlers = upb_handlers_new(); upb_textprinter *p = upb_textprinter_new(); upb_textprinter_reset(p, upb_stdio_bytesink(out), false); - upb_textprinter_reghandlers(&handlers, m); + upb_textprinter_reghandlers(handlers, m); upb_decoder d; - upb_decoder_init(&d, &handlers); + upb_decoder_initforhandlers(&d, handlers); upb_decoder_reset(&d, upb_stdio_bytesrc(in), p); upb_clearerr(&status); diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index f23398a..8c2e97d 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -21,19 +21,22 @@ size_t string_size; void compare(const google::protobuf::Message& proto2_msg, - upb_msg *upb_msg, upb_msgdef *upb_md); + void *upb_msg, upb_msgdef *upb_md); void compare_arrays(const google::protobuf::Reflection *r, const google::protobuf::Message& proto2_msg, const google::protobuf::FieldDescriptor *proto2_f, - upb_msg *upb_msg, upb_fielddef *upb_f) + void *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - ASSERT(upb_isarray(upb_f)); - upb_array *arr = upb_value_getarr(upb_msg_get(upb_msg, upb_f)); - ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); - for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { - upb_value v = upb_array_get(arr, upb_f, i); + ASSERT(upb_isseq(upb_f)); + void *arr = upb_value_getptr(upb_msg_get(upb_msg, upb_f)); + void *iter = upb_seq_begin(arr, upb_f); + for(int i = 0; + i < r->FieldSize(proto2_msg, proto2_f); + i++, iter = upb_seq_next(arr, iter, upb_f)) { + ASSERT(!upb_seq_done(iter)); + upb_value v = upb_seq_get(iter, upb_f); switch(upb_f->type) { default: ASSERT(false); @@ -76,18 +79,20 @@ void compare_arrays(const google::protobuf::Reflection *r, } case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): + // XXX: getstr ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL); compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i), - upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def)); + upb_value_getstr(v), upb_downcast_msgdef(upb_f->def)); } } + ASSERT(upb_seq_done(iter)); } #include void compare_values(const google::protobuf::Reflection *r, const google::protobuf::Message& proto2_msg, const google::protobuf::FieldDescriptor *proto2_f, - upb_msg *upb_msg, upb_fielddef *upb_f) + void *upb_msg, upb_fielddef *upb_f) { upb_value v = upb_msg_get(upb_msg, upb_f); switch(upb_f->type) { @@ -132,13 +137,14 @@ void compare_values(const google::protobuf::Reflection *r, } case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): + // XXX: getstr compare(r->GetMessage(proto2_msg, proto2_f), - upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def)); + upb_value_getstr(v), upb_downcast_msgdef(upb_f->def)); } } void compare(const google::protobuf::Message& proto2_msg, - upb_msg *upb_msg, upb_msgdef *upb_md) + void *upb_msg, upb_msgdef *upb_md) { const google::protobuf::Reflection *r = proto2_msg.GetReflection(); const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); @@ -157,15 +163,15 @@ void compare(const google::protobuf::Message& proto2_msg, upb_string_len(upb_f->name)) == proto2_f->name()); ASSERT(upb_f->type == proto2_f->type()); - ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated()); + ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated()); if(!upb_msg_has(upb_msg, upb_f)) { - if(upb_isarray(upb_f)) + if(upb_isseq(upb_f)) ASSERT(r->FieldSize(proto2_msg, proto2_f) == 0); else ASSERT(r->HasField(proto2_msg, proto2_f) == false); } else { - if(upb_isarray(upb_f)) { + if(upb_isseq(upb_f)) { compare_arrays(r, proto2_msg, proto2_f, upb_msg, upb_f); } else { ASSERT(r->HasField(proto2_msg, proto2_f) == true); @@ -176,7 +182,7 @@ void compare(const google::protobuf::Message& proto2_msg, } void parse_and_compare(MESSAGE_CIDENT *proto2_msg, - upb_msg *upb_msg, upb_msgdef *upb_md, + void *upb_msg, upb_msgdef *upb_md, upb_string *str) { // Parse to both proto2 and upb. @@ -220,7 +226,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_parsedesc(symtab, fds, &status); + upb_read_descriptor(symtab, fds, &status); if(!upb_ok(&status)) { fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); upb_printerr(&status); @@ -247,12 +253,12 @@ int main(int argc, char *argv[]) // Run twice to test proper object reuse. MESSAGE_CIDENT proto2_msg; - upb_msg *upb_msg = upb_msg_new(msgdef); + void *upb_msg = upb_stdmsg_new(msgdef); parse_and_compare(&proto2_msg, upb_msg, msgdef, str); parse_and_compare(&proto2_msg, upb_msg, msgdef, str); printf("All tests passed, %d assertions.\n", num_assertions); - upb_msg_unref(upb_msg, msgdef); + upb_stdmsg_free(upb_msg, msgdef); upb_def_unref(UPB_UPCAST(msgdef)); upb_string_unref(str); upb_symtab_unref(symtab); diff --git a/tests/tests.c b/tests/tests.c index a78ca03..c6b5051 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -17,7 +17,7 @@ static upb_symtab *load_test_proto() { exit(1); } upb_status status = UPB_STATUS_INIT; - upb_parsedesc(s, descriptor, &status); + upb_read_descriptor(s, descriptor, &status); ASSERT(upb_ok(&status)); upb_status_uninit(&status); upb_string_unref(descriptor); @@ -42,7 +42,7 @@ static void test_upb_jit() { upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL}; upb_handlers_reghandlerset(h, upb_downcast_msgdef(def), &hset); upb_decoder d; - upb_decoder_init(&d, h); + upb_decoder_initforhandlers(&d, h); upb_decoder_uninit(&d); upb_symtab_unref(s); upb_def_unref(def); -- cgit v1.2.3