summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile83
-rw-r--r--benchmarks/google_messages.proto6
-rw-r--r--benchmarks/parsestream.upb.c4
-rw-r--r--benchmarks/parsetoproto2.upb.cc311
-rw-r--r--benchmarks/parsetostruct.upb.c85
-rw-r--r--bindings/cpp/upb/bytestream.hpp33
-rw-r--r--bindings/cpp/upb/def.hpp381
-rw-r--r--bindings/cpp/upb/handlers.cc39
-rw-r--r--bindings/cpp/upb/handlers.hpp47
-rw-r--r--bindings/cpp/upb/msg.hpp62
-rw-r--r--bindings/cpp/upb/pb/glue.hpp12
-rw-r--r--bindings/cpp/upb/proto2_bridge.cc892
-rw-r--r--bindings/cpp/upb/proto2_bridge.hpp170
-rw-r--r--bindings/cpp/upb/upb.hpp44
-rw-r--r--bindings/lua/upb.c24
-rw-r--r--bindings/python/upb.c10
-rw-r--r--tests/test.proto24
-rw-r--r--tests/test_cpp.cc11
-rw-r--r--tests/test_decoder.cc (renamed from tests/test_decoder.c)543
-rw-r--r--tests/test_def.c171
-rw-r--r--tests/test_table.cc134
-rw-r--r--tests/test_vs_proto2.cc294
-rw-r--r--tests/tests.c121
-rw-r--r--tests/upb_test.h22
-rw-r--r--tools/upbc.c37
-rw-r--r--upb/atomic.h181
-rw-r--r--upb/bytestream.c9
-rw-r--r--upb/bytestream.h6
-rw-r--r--upb/def.c1194
-rw-r--r--upb/def.h619
-rw-r--r--upb/descriptor/descriptor_const.h (renamed from upb/descriptor_const.h)266
-rw-r--r--upb/descriptor/reader.c (renamed from upb/descriptor.c)80
-rw-r--r--upb/descriptor/reader.h (renamed from upb/descriptor.h)30
-rw-r--r--upb/handlers.c64
-rw-r--r--upb/handlers.h51
-rw-r--r--upb/msg.c322
-rw-r--r--upb/msg.h178
-rw-r--r--upb/pb/decoder.c141
-rw-r--r--upb/pb/decoder_x64.dasc141
-rw-r--r--upb/pb/glue.c94
-rw-r--r--upb/pb/glue.h17
-rw-r--r--upb/pb/textprinter.c7
-rw-r--r--upb/pb/varint.h10
-rw-r--r--upb/refcount.c224
-rw-r--r--upb/refcount.h70
-rw-r--r--upb/table.c568
-rw-r--r--upb/table.h238
-rw-r--r--upb/upb.c39
-rw-r--r--upb/upb.h108
49 files changed, 4584 insertions, 3633 deletions
diff --git a/Makefile b/Makefile
index 6aef581..a12e7ef 100644
--- a/Makefile
+++ b/Makefile
@@ -83,11 +83,15 @@ deps: Makefile $(ALLSRC)
CORE= \
upb/upb.c \
upb/handlers.c \
- upb/descriptor.c \
+ upb/descriptor/reader.c \
upb/table.c \
+ upb/refcount.c \
upb/def.c \
upb/msg.c \
upb/bytestream.c \
+ bindings/cpp/upb/proto2_bridge.cc \
+
+# TODO: the proto2 bridge should be built as a separate library.
# Library for the protocol buffer format (both text and binary).
PB= \
@@ -122,8 +126,9 @@ LIBUPB_PIC=upb/libupb_pic.a
lib: $(LIBUPB)
-OBJ=$(patsubst %.c,%.o,$(SRC))
-PICOBJ=$(patsubst %.c,%.lo,$(SRC))
+OBJ=$(patsubst %.c,%.o,$(SRC)) $(patsubst %.cc,%.o,$(SRC))
+PICOBJ=$(patsubst %.c,%.lo,$(SRC)) $(patsubst %.cc,%.lo,$(SRC))
+
ifdef USE_JIT
upb/pb/decoder.o upb/pb/decoder.lo: upb/pb/decoder_x64.h
@@ -139,10 +144,18 @@ $(LIBUPB_PIC): $(PICOBJ)
$(E) CC $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+%.o : %.cc
+ $(E) CXX $<
+ $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
+
%.lo : %.c
$(E) 'CC -fPIC' $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC
+%.o : %.cc
+ $(E) CXX $<
+ $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC
+
# Override the optimization level for def.o, because it is not in the
# critical path but gets very large when -O3 is used.
upb/def.o: upb/def.c
@@ -197,47 +210,39 @@ tests/test.proto.pb: tests/test.proto
SIMPLE_TESTS= \
tests/test_def \
tests/test_varint \
- tests/tests \
-
-# Too many tests in this binary to run Valgrind (it takes minutes).
-SLOW_TESTS= \
- tests/test_decoder \
SIMPLE_CXX_TESTS= \
tests/test_table \
tests/test_cpp \
+ tests/test_decoder \
VARIADIC_TESTS= \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
-TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) $(SLOW_TESTS)
-tests: $(TESTS)
+TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS)
+
+
+tests: $(TESTS) $(INTERACTIVE_TESTS)
$(TESTS): $(LIBUPB)
-tests/tests: tests/test.proto.pb
+tests/test_def: tests/test.proto.pb
$(SIMPLE_TESTS): % : %.c
$(E) CC $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)
-VALGRIND=valgrind --leak-check=full --error-exitcode=1
+VALGRIND=valgrind --leak-check=full --error-exitcode=1
test: tests
@echo Running all tests under valgrind.
@set -e # Abort on error.
@for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \
if [ -x ./$$test ] ; then \
- echo !!! $(VALGRIND) ./$$test tests/test.proto.pb; \
+ echo !!! $(VALGRIND) ./$$test; \
$(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \
fi \
done;
- @for test in "$(SLOW_TESTS)"; do \
- if [ -x ./$$test ] ; then \
- echo !!! ./$$test; \
- ./$$test || exit 1; \
- fi \
- done;
- @$(VALGRIND) tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat
- @$(VALGRIND) tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat
+ @$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_message1.dat || exit 1;
+ @$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_message2.dat || exit 1;
@echo "All tests passed!"
tests/t.test_vs_proto2.googlemessage1 \
@@ -273,15 +278,11 @@ tests/tests: upb/libupb.a
# Benchmarks
UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \
benchmarks/b.parsestream_googlemessage2.upb_table \
- benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
- benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \
ifdef USE_JIT
UPB_BENCHMARKS += \
benchmarks/b.parsestream_googlemessage1.upb_jit \
benchmarks/b.parsestream_googlemessage2.upb_jit \
- benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \
- benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval \
benchmarks/b.parsetoproto2_googlemessage1.upb_jit \
benchmarks/b.parsetoproto2_googlemessage2.upb_jit
endif
@@ -318,21 +319,21 @@ benchmarks/google_messages.pb.cc: benchmarks/google_messages.proto
# want to make these command-line parameters -- it makes it more annoying to
# debug or profile them.
-benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
-benchmarks/b.parsetostruct_googlemessage2.upb_table_byval: \
+benchmarks/b.parsetostruct_googlemessage1.upb_table \
+benchmarks/b.parsetostruct_googlemessage2.upb_table: \
benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb
- $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byval, nojit)'
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table_byval $< \
+ $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, nojit)'
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table $< \
-DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
-DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
-DMESSAGE_FILE=\"google_message1.dat\" \
- -DBYREF=false -DJIT=false $(LIBUPB)
- $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byref, nojit)'
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table_byval $< \
+ -DJIT=false $(LIBUPB)
+ $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, nojit)'
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table $< \
-DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
-DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
-DMESSAGE_FILE=\"google_message2.dat\" \
- -DBYREF=false -DJIT=false $(LIBUPB)
+ -DJIT=false $(LIBUPB)
benchmarks/b.parsestream_googlemessage1.upb_table \
benchmarks/b.parsestream_googlemessage2.upb_table: \
@@ -351,21 +352,21 @@ benchmarks/b.parsestream_googlemessage2.upb_table: \
$(LIBUPB)
ifdef USE_JIT
-benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \
-benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval: \
+benchmarks/b.parsetostruct_googlemessage1.upb_jit \
+benchmarks/b.parsetostruct_googlemessage2.upb_jit: \
benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb
- $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byref, jit)'
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval $< \
+ $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, jit)'
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit $< \
-DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
-DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
-DMESSAGE_FILE=\"google_message1.dat\" -DJIT=true \
- -DBYREF=true -DJIT=true $(LIBUPB)
- $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byval, jit)'
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval $< \
+ -DJIT=true $(LIBUPB)
+ $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, jit)'
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit $< \
-DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
-DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
-DMESSAGE_FILE=\"google_message2.dat\" -DJIT=true \
- -DBYREF=false -DJIT=true $(LIBUPB)
+ -DJIT=true $(LIBUPB)
benchmarks/b.parsestream_googlemessage1.upb_jit \
benchmarks/b.parsestream_googlemessage2.upb_jit: \
diff --git a/benchmarks/google_messages.proto b/benchmarks/google_messages.proto
index b43e94b..b367954 100644
--- a/benchmarks/google_messages.proto
+++ b/benchmarks/google_messages.proto
@@ -3,6 +3,11 @@ package benchmarks;
option optimize_for = SPEED;
+enum Foo {
+ FOO_VALUE = 1;
+ FOO_VALUE2 = 2;
+}
+
message SpeedMessage1 {
required string field1 = 1;
optional string field9 = 9;
@@ -45,6 +50,7 @@ message SpeedMessage1 {
optional int32 field128 = 128 [default=0];
optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"];
optional int32 field131 = 131 [default=0];
+ optional Foo field132 = 132 [default=FOO_VALUE];
}
message SpeedMessage1SubMessage {
diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c
index 0316a86..e9164d0 100644
--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@@ -39,7 +39,7 @@ static bool initialize()
return false;
}
- def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
+ def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME, &def));
if(!def) {
fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
return false;
@@ -68,7 +68,7 @@ static bool initialize()
static void cleanup()
{
free(input_str);
- upb_def_unref(UPB_UPCAST(def));
+ upb_def_unref(UPB_UPCAST(def), &def);
upb_decoder_uninit(&decoder);
upb_decoderplan_unref(plan);
upb_stringsrc_uninit(&stringsrc);
diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc
index 988faad..5023b0e 100644
--- a/benchmarks/parsetoproto2.upb.cc
+++ b/benchmarks/parsetoproto2.upb.cc
@@ -1,320 +1,61 @@
-// This file is a crime against software engineering. It breaks the
-// encapsulation of proto2 in numerous ways, violates the C++ standard
-// in others, and generally deserves to have comtempt and scorn heaped
-// upon it.
-//
-// Its purpose is to get an accurate benchmark for how fast upb can
-// parse into proto2 data structures. To add proper support for this
-// functionality, proto2 would need to expose actual support for the
-// operations we are trying to perform here.
+// Tests speed of upb parsing into proto2 generated classes.
#define __STDC_LIMIT_MACROS 1
#include "main.c"
#include <stdint.h>
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
+#include "upb/bytestream.hpp"
+#include "upb/def.hpp"
+#include "upb/msg.hpp"
+#include "upb/pb/decoder.hpp"
#include "upb/pb/glue.h"
-
-// Need to violate the encapsulation of GeneratedMessageReflection -- see below.
-#define private public
+#include "upb/proto2_bridge.hpp"
#include MESSAGE_HFILE
-#include <google/protobuf/descriptor.h>
-#undef private
-static size_t len;
+const char *str;
+size_t len;
MESSAGE_CIDENT msg[NUM_MESSAGES];
MESSAGE_CIDENT msg2;
-static upb_stringsrc strsrc;
-static upb_decoder d;
-static const upb_msgdef *def;
-static upb_decoderplan *p;
-char *str;
-
-#define PROTO2_APPEND(type, ctype) \
- upb_flow_t proto2_append_ ## type(void *_r, upb_value fval, upb_value val) { \
- (void)fval; \
- typedef google::protobuf::RepeatedField<ctype> R; \
- R *r = (R*)_r; \
- r->Add(upb_value_get ## type(val)); \
- return UPB_CONTINUE; \
- }
-
-PROTO2_APPEND(double, double)
-PROTO2_APPEND(float, float)
-PROTO2_APPEND(uint64, uint64_t)
-PROTO2_APPEND(int64, int64_t)
-PROTO2_APPEND(int32, int32_t)
-PROTO2_APPEND(uint32, uint32_t)
-PROTO2_APPEND(bool, bool)
-
-upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
- assert(m != NULL);
- const upb_fielddef *f = upb_value_getfielddef(fval);
- std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
- if (*str == f->default_ptr) *str = new std::string;
- const upb_byteregion *reg = upb_value_getbyteregion(val);
- size_t len;
- (*str)->assign(
- upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len),
- upb_byteregion_len(reg));
- // XXX: only supports contiguous strings atm.
- assert(len == upb_byteregion_len(reg));
- return UPB_CONTINUE;
-}
-
-upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) {
- assert(_r != NULL);
- typedef google::protobuf::RepeatedPtrField<std::string> R;
- (void)fval;
- R *r = (R*)_r;
- const upb_byteregion *reg = upb_value_getbyteregion(val);
- size_t len;
- r->Add()->assign(
- upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len),
- upb_byteregion_len(reg));
- // XXX: only supports contiguous strings atm.
- assert(len == upb_byteregion_len(reg));
- return UPB_CONTINUE;
-}
-
-upb_sflow_t proto2_startseq(void *m, upb_value fval) {
- assert(m != NULL);
- const upb_fielddef *f = upb_value_getfielddef(fval);
- return UPB_CONTINUE_WITH(UPB_INDEX(m, f->offset, 1));
-}
-
-upb_sflow_t proto2_startsubmsg(void *m, upb_value fval) {
- assert(m != NULL);
- const upb_fielddef *f = upb_value_getfielddef(fval);
- google::protobuf::Message *prototype = (google::protobuf::Message*)f->prototype;
- void **subm = (void**)UPB_INDEX(m, f->offset, 1);
- if (*subm == NULL || *subm == f->default_ptr)
- *subm = prototype->New();
- assert(*subm != NULL);
- return UPB_CONTINUE_WITH(*subm);
-}
-
-class UpbRepeatedPtrField : public google::protobuf::internal::RepeatedPtrFieldBase {
- public:
- class TypeHandler {
- public:
- typedef void Type;
- // AddAllocated() calls this, but only if other objects are sitting
- // around waiting for reuse, which we will not do.
- static void Delete(Type*) { assert(false); }
- };
- void *Add(google::protobuf::Message *m) {
- void *submsg = RepeatedPtrFieldBase::AddFromCleared<TypeHandler>();
- if (!submsg) {
- submsg = m->New();
- RepeatedPtrFieldBase::AddAllocated<TypeHandler>(submsg);
- }
- return submsg;
- }
-};
-
-upb_sflow_t proto2_startsubmsg_r(void *_r, upb_value fval) {
- assert(_r != NULL);
- // Compared to the other writers, this implementation is particularly sketchy.
- // The object we are modifying is a RepeatedPtrField<SubType>*, but we can't
- // properly declare that templated pointer because we don't have access to
- // that type at compile-time (and wouldn't want to create a separate callback
- // for each type anyway). Instead we access the pointer as a
- // RepeatedPtrFieldBase, which is indeed a superclass of RepeatedPtrField.
- // But we can't properly declare a TypeHandler for the submessage's type,
- // for the same reason that we can't create a RepeatedPtrField<SubType>*.
- // Instead we treat it as a void*, and create the submessage using
- // google::protobuf::Message::New() if we need to.
- class TypeHandler {
- public:
- typedef void Type;
- };
- const upb_fielddef *f = upb_value_getfielddef(fval);
- UpbRepeatedPtrField *r = (UpbRepeatedPtrField*)_r;
- void *submsg = r->Add((google::protobuf::Message*)f->prototype);
- assert(submsg != NULL);
- return UPB_CONTINUE_WITH(submsg);
-}
-
-#define PROTO2MSG(type, size) { static upb_accessor_vtbl vtbl = { \
- &proto2_startsubmsg, \
- &upb_stdmsg_set ## type, \
- &proto2_startseq, \
- &proto2_startsubmsg_r, \
- &proto2_append_ ## type, \
- NULL, NULL, NULL, NULL, NULL, NULL}; \
- return &vtbl; }
-
-static upb_accessor_vtbl *proto2_accessor(upb_fielddef *f) {
- switch (f->type) {
- case UPB_TYPE(DOUBLE): PROTO2MSG(double, 8)
- case UPB_TYPE(FLOAT): PROTO2MSG(float, 4)
- case UPB_TYPE(UINT64):
- case UPB_TYPE(FIXED64): PROTO2MSG(uint64, 8)
- case UPB_TYPE(INT64):
- case UPB_TYPE(SFIXED64):
- case UPB_TYPE(SINT64): PROTO2MSG(int64, 8)
- case UPB_TYPE(INT32):
- case UPB_TYPE(SINT32):
- case UPB_TYPE(ENUM):
- case UPB_TYPE(SFIXED32): PROTO2MSG(int32, 4)
- case UPB_TYPE(UINT32):
- case UPB_TYPE(FIXED32): PROTO2MSG(uint32, 4)
- case UPB_TYPE(BOOL): PROTO2MSG(bool, 1)
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES):
- case UPB_TYPE(GROUP):
- case UPB_TYPE(MESSAGE): {
- static upb_accessor_vtbl vtbl = {
- &proto2_startsubmsg,
- &proto2_setstr,
- &proto2_startseq,
- &proto2_startsubmsg_r,
- &proto2_append_str,
- NULL, NULL, NULL, NULL, NULL, NULL};
- return &vtbl;
- }
- }
- return NULL;
-}
-
-static void layout_msgdef_from_proto2(upb_msgdef *upb_md,
- const google::protobuf::Message *m,
- const google::protobuf::Descriptor *proto2_d) {
- // Hack: we break the encapsulation of GeneratedMessageReflection to get at
- // the offsets we need. If/when we do this for real, we will need
- // GeneratedMessageReflection to expose those offsets publicly.
- const google::protobuf::internal::GeneratedMessageReflection *r =
- (google::protobuf::internal::GeneratedMessageReflection*)m->GetReflection();
- for (int i = 0; i < proto2_d->field_count(); i++) {
- const google::protobuf::FieldDescriptor *proto2_f = proto2_d->field(i);
- upb_fielddef *upb_f = upb_msgdef_itof(upb_md, proto2_f->number());
- assert(upb_f);
-
- // Encapsulation violation BEGIN
- uint32_t data_offset = r->offsets_[proto2_f->index()];
- uint32_t hasbit = (r->has_bits_offset_ * 8) + proto2_f->index();
- // Encapsulation violation END
-
- if (upb_isseq(upb_f)) {
- // proto2 does not store hasbits for repeated fields.
- upb_f->hasbit = -1;
- } else {
- upb_f->hasbit = hasbit;
- }
- upb_f->offset = data_offset;
- upb_fielddef_setaccessor(upb_f, proto2_accessor(upb_f));
-
- if (upb_isstring(upb_f) && !upb_isseq(upb_f)) {
- upb_f->default_ptr = &r->GetStringReference(*m, proto2_f, NULL);
- } else if (upb_issubmsg(upb_f)) {
- // XXX: skip leading "."
- const google::protobuf::Descriptor *subm_descriptor =
- google::protobuf::DescriptorPool::generated_pool()->
- FindMessageTypeByName(upb_fielddef_typename(upb_f) + 1);
- assert(subm_descriptor);
- upb_f->prototype = google::protobuf::MessageFactory::generated_factory()->GetPrototype(subm_descriptor);
- if (!upb_isseq(upb_f))
- upb_f->default_ptr = &r->GetMessage(*m, proto2_f);
- }
- }
-}
+upb::StringSource strsrc;
+upb::Decoder d;
+const upb::MessageDef *def;
+upb::DecoderPlan* plan;
static bool initialize()
{
- // Initialize upb state, decode descriptor.
- upb_status status = UPB_STATUS_INIT;
- upb_symtab *s = upb_symtab_new();
-
- char *data = upb_readfile(MESSAGE_DESCRIPTOR_FILE, &len);
- if (!data) {
- fprintf(stderr, "Couldn't read file: " MESSAGE_DESCRIPTOR_FILE);
- return false;
- }
- int n;
- upb_def **defs = upb_load_defs_from_descriptor(data, len, &n, &status);
- free(data);
- if(!upb_ok(&status)) {
- fprintf(stderr, "Error reading descriptor: %s\n",
- upb_status_getstr(&status));
- return false;
- }
-
- // Setup offsets and accessors to properly write into a proto2 generated
- // class.
- for (int i = 0; i < n; i++) {
- upb_def *def = defs[i];
- upb_msgdef *upb_md = upb_dyncast_msgdef(def);
- if (!upb_md) continue;
- const google::protobuf::Descriptor *proto2_md =
- google::protobuf::DescriptorPool::generated_pool()->
- FindMessageTypeByName(upb_def_fqname(def));
- if (!proto2_md) abort();
- const google::protobuf::Message *proto2_m =
- google::protobuf::MessageFactory::generated_factory()->GetPrototype(proto2_md);
- layout_msgdef_from_proto2(upb_md, proto2_m, proto2_md);
- }
-
- upb_symtab_add(s, defs, n, &status);
- if(!upb_ok(&status)) {
- fprintf(stderr, "Error reading adding to symtab: %s\n",
- upb_status_getstr(&status));
- return false;
- }
- for(int i = 0; i < n; i++) upb_def_unref(defs[i]);
- free(defs);
-
- def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
- if(!def) {
- fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
- return false;
- }
- upb_symtab_unref(s);
-
// Read the message data itself.
str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
}
- upb_status_uninit(&status);
+
+ def = upb::proto2_bridge::NewFinalMessageDef(msg2, &def);
msg2.ParseFromArray(str, len);
- upb_stringsrc_init(&strsrc);
- upb_handlers *h = upb_handlers_new();
- upb_accessors_reghandlers(h, def);
- p = upb_decoderplan_new(h, JIT);
- upb_decoder_init(&d);
- upb_decoder_resetplan(&d, p, 0);
- upb_handlers_unref(h);
+ upb::Handlers* h = upb::Handlers::New();
+ upb::RegisterWriteHandlers(h, def);
+ plan = upb::DecoderPlan::New(h, JIT);
+ d.ResetPlan(plan, 0);
+ h->Unref();
return true;
}
static void cleanup() {
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
- upb_def_unref(UPB_UPCAST(def));
- upb_decoderplan_unref(p);
- free(str);
+ def->Unref(&def);
+ plan->Unref();
}
-static size_t run(int i)
-{
- (void)i;
- upb_status status = UPB_STATUS_INIT;
+static size_t run(int i) {
msg[i % NUM_MESSAGES].Clear();
- upb_stringsrc_reset(&strsrc, str, len);
- upb_decoder_resetinput(
- &d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]);
- if (upb_decoder_decode(&d) != UPB_OK) goto err;
+ strsrc.Reset(str, len);
+ d.ResetInput(strsrc.AllBytes(), &msg[i % NUM_MESSAGES]);
+ if (d.Decode() != UPB_OK) goto err;
return len;
err:
- fprintf(stderr, "Decode error: %s", upb_status_getstr(&status));
+ fprintf(stderr, "Decode error: %s", d.status().GetString());
return 0;
}
diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c
deleted file mode 100644
index 9487577..0000000
--- a/benchmarks/parsetostruct.upb.c
+++ /dev/null
@@ -1,85 +0,0 @@
-
-#include "main.c"
-
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-
-static const upb_msgdef *def;
-static size_t len;
-static void *msg[NUM_MESSAGES];
-static upb_stringsrc strsrc;
-static upb_decoder d;
-static upb_decoderplan *p;
-char *str;
-
-static bool initialize()
-{
- // Initialize upb state, decode descriptor.
- upb_status status = UPB_STATUS_INIT;
- upb_symtab *s = upb_symtab_new();
- upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status);
- if(!upb_ok(&status)) {
- fprintf(stderr, "Error reading descriptor: %s\n",
- upb_status_getstr(&status));
- return false;
- }
-
- def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
- if(!def) {
- fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
- return false;
- }
- upb_symtab_unref(s);
-
- // Read the message data itself.
- str = upb_readfile(MESSAGE_FILE, &len);
- if(str == NULL) {
- fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
- return false;
- }
- upb_status_uninit(&status);
- for (int i = 0; i < NUM_MESSAGES; i++)
- msg[i] = upb_stdmsg_new(def);
-
- upb_stringsrc_init(&strsrc);
- upb_handlers *h = upb_handlers_new();
- upb_accessors_reghandlers(h, def);
- p = upb_decoderplan_new(h, JIT);
- upb_decoder_init(&d);
- upb_handlers_unref(h);
- upb_decoder_resetplan(&d, p, 0);
-
- if (!BYREF) {
- // TODO: use byref/byval accessors.
- }
- return true;
-}
-
-static void cleanup()
-{
- for (int i = 0; i < NUM_MESSAGES; i++)
- upb_stdmsg_free(msg[i], def);
- upb_def_unref(UPB_UPCAST(def));
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
- upb_decoderplan_unref(p);
- free(str);
-}
-
-static size_t run(int i)
-{
- upb_status status = UPB_STATUS_INIT;
- i %= NUM_MESSAGES;
- upb_msg_clear(msg[i], def);
- upb_stringsrc_reset(&strsrc, str, len);
- upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg[i]);
- if (upb_decoder_decode(&d) != UPB_OK) goto err;
- return len;
-
-err:
- fprintf(stderr, "Decode error: %s", upb_status_getstr(&status));
- return 0;
-}
diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp
index 968d542..81134b9 100644
--- a/bindings/cpp/upb/bytestream.hpp
+++ b/bindings/cpp/upb/bytestream.hpp
@@ -68,6 +68,7 @@
#include "upb/bytestream.h"
#include "upb/upb.hpp"
+#include <string>
namespace upb {
@@ -204,6 +205,18 @@ class ByteRegion : public upb_byteregion {
return upb_byteregion_strdup(this);
}
+ template <typename T> void AssignToString(T* str) {
+ uint64_t ofs = start_ofs();
+ str->clear();
+ str->reserve(Length());
+ while (ofs < end_ofs()) {
+ size_t len;
+ const char *ptr = GetPtr(ofs, &len);
+ str->append(ptr, len);
+ ofs += len;
+ }
+ }
+
// TODO: add if/when there is a demonstrated need.
//
// // Pins this byteregion's bytes in memory, allowing it to outlive its
@@ -220,12 +233,24 @@ class ByteRegion : public upb_byteregion {
class StringSource : public upb_stringsrc {
public:
StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
+ template <typename T> explicit StringSource(const T& str) {
+ upb_stringsrc_init(this);
+ Reset(str);
+ }
+ StringSource(const char *data, size_t len) {
+ upb_stringsrc_init(this);
+ Reset(data, len);
+ }
~StringSource() { upb_stringsrc_uninit(this); }
void Reset(const char* data, size_t len) {
upb_stringsrc_reset(this, data, len);
}
+ template <typename T> void Reset(const T& str) {
+ Reset(str.c_str(), str.size());
+ }
+
ByteRegion* AllBytes() {
return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
}
@@ -233,6 +258,14 @@ class StringSource : public upb_stringsrc {
upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
};
+template <> inline ByteRegion* GetValue<ByteRegion*>(Value v) {
+ return static_cast<ByteRegion*>(upb_value_getbyteregion(v));
+}
+
+template <> inline Value MakeValue<ByteRegion*>(ByteRegion* v) {
+ return upb_value_byteregion(v);
+}
+
} // namespace upb
#endif
diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp
index 030ba40..6998648 100644
--- a/bindings/cpp/upb/def.hpp
+++ b/bindings/cpp/upb/def.hpp
@@ -1,7 +1,7 @@
//
// upb - a minimalist implementation of protocol buffers.
//
-// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Copyright (c) 2011-2012 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
//
// The set of upb::*Def classes and upb::SymbolTable allow for defining and
@@ -15,21 +15,20 @@
// not be used for any purpose except to set its properties (it can't be
// used to parse anything, create any messages in memory, etc).
//
-// 2. FINALIZED: after being added to a symtab (which links the defs together)
-// the defs become finalized (thread-safe and immutable). Programs may only
-// access defs through a CONST POINTER during this stage -- upb_symtab will
-// help you out with this requirement by only vending const pointers, but
-// you need to make sure not to use any non-const pointers you still have
-// sitting around. In practice this means that you may not call any setters
-// on the defs (or functions that themselves call the setters). If you want
-// to modify an existing immutable def, copy it with upb_*_dup(), modify the
-// copy, and add the modified def to the symtab (replacing the existing
-// def).
+// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs,
+// which makes them thread-safe and immutable. Finalized defs may only be
+// accessed through a CONST POINTER. If you want to modify an existing
+// immutable def, copy it with Dup() and modify and finalize the copy.
//
-// You can test for which stage of life a def is in by calling
-// upb::Def::IsMutable(). This is particularly useful for dynamic language
-// bindings, which must properly guarantee that the dynamic language cannot
-// break the rules laid out above.
+// The refcounting of defs works properly no matter what state the def is in.
+// Once the def is finalized it is guaranteed that any def reachable from a
+// live def is also live (so a ref on the base of a message tree keeps the
+// whole tree alive).
+//
+// You can test for which stage of life a def is in by calling IsMutable().
+// This is particularly useful for dynamic language bindings, which must
+// properly guarantee that the dynamic language cannot break the rules laid out
+// above.
//
// It would be possible to make the defs thread-safe during stage 1 by using
// mutexes internally and changing any methods returning pointers to return
@@ -48,63 +47,213 @@
namespace upb {
+class Def;
class MessageDef;
+typedef upb_fieldtype_t FieldType;
+typedef upb_label_t Label;
+
class FieldDef : public upb_fielddef {
public:
- static FieldDef* Cast(upb_fielddef *f) { return (FieldDef*)f; }
- static const FieldDef* Cast(const upb_fielddef *f) { return (FieldDef*)f; }
+ static FieldDef* Cast(upb_fielddef *f) { return static_cast<FieldDef*>(f); }
+ static const FieldDef* Cast(const upb_fielddef *f) {
+ return static_cast<const FieldDef*>(f);
+ }
+
+ static FieldDef* New(void *owner) { return Cast(upb_fielddef_new(owner)); }
+ FieldDef* Dup(void *owner) const {
+ return Cast(upb_fielddef_dup(this, owner));
+ }
+ void Ref(void *owner) { upb_fielddef_ref(this, owner); }
+ void Unref(void *owner) { upb_fielddef_unref(this, owner); }
- static FieldDef* New() { return Cast(upb_fielddef_new()); }
- FieldDef* Dup() { return Cast(upb_fielddef_dup(this)); }
+ bool IsMutable() const { return upb_fielddef_ismutable(this); }
+ bool IsFinalized() const { return upb_fielddef_isfinalized(this); }
+ bool IsString() const { return upb_isstring(this); }
+ bool IsSequence() const { return upb_isseq(this); }
+ bool IsSubmessage() const { return upb_issubmsg(this); }
- // Read accessors -- may be called at any time.
- uint8_t type() const { return upb_fielddef_type(this); }
- uint8_t label() const { return upb_fielddef_label(this); }
+ // Simple accessors. /////////////////////////////////////////////////////////
+
+ FieldType type() const { return upb_fielddef_type(this); }
+ Label label() const { return upb_fielddef_label(this); }
int32_t number() const { return upb_fielddef_number(this); }
std::string name() const { return std::string(upb_fielddef_name(this)); }
Value default_() const { return upb_fielddef_default(this); }
Value bound_value() const { return upb_fielddef_fval(this); }
+ uint16_t offset() const { return upb_fielddef_offset(this); }
+ int16_t hasbit() const { return upb_fielddef_hasbit(this); }
+
+ bool set_type(FieldType type) { return upb_fielddef_settype(this, type); }
+ bool set_label(Label label) { return upb_fielddef_setlabel(this, label); }
+ void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); }
+ void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); }
+ void set_fval(Value fval) { upb_fielddef_setfval(this, fval); }
+ void set_accessor(struct _upb_accessor_vtbl* vtbl) {
+ upb_fielddef_setaccessor(this, vtbl);
+ }
+ MessageDef* message();
+ const MessageDef* message() const;
- MessageDef* message() { return (MessageDef*)upb_fielddef_msgdef(this); }
- const MessageDef* message() const { return (MessageDef*)upb_fielddef_msgdef(this); }
-
- // Will be added once upb::Def is defined:
- // Def* subdef() { return upb_fielddef_subdef(this); }
- // const Def* subdef() { return upb_fielddef_subdef(this); }
-
- // Returns true if this FieldDef is finalized
- bool IsFinalized() const { return upb_fielddef_finalized(this); }
struct _upb_accessor_vtbl *accessor() const {
return upb_fielddef_accessor(this);
}
- std::string type_name() const {
- return std::string(upb_fielddef_typename(this));
+
+ // "Number" and "name" must be set before the fielddef is added to a msgdef.
+ // For the moment we do not allow these to be set once the fielddef is added
+ // to a msgdef -- this could be relaxed in the future.
+ bool set_number(int32_t number) {
+ return upb_fielddef_setnumber(this, number);
+ }
+ bool set_name(const char *name) { return upb_fielddef_setname(this, name); }
+ bool set_name(const std::string& name) { return set_name(name.c_str()); }
+
+ // Default value. ////////////////////////////////////////////////////////////
+
+ // Returns the default value for this fielddef, which may either be something
+ // the client set explicitly or the "default default" (0 for numbers, empty
+ // for strings). The field's type indicates the type of the returned value,
+ // except for enum fields that are still mutable.
+ //
+ // For enums the default can be set either numerically or symbolically -- the
+ // upb_fielddef_default_is_symbolic() function below will indicate which it
+ // is. For string defaults, the value will be a upb_byteregion which is
+ // invalidated by any other non-const call on this object. Once the fielddef
+ // is finalized, symbolic enum defaults are resolved, so finalized enum
+ // fielddefs always have a default of type int32.
+ Value defaultval() { return upb_fielddef_default(this); }
+
+ // Sets default value for the field. For numeric types, use
+ // upb_fielddef_setdefault(), and "value" must match the type of the field.
+ // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may
+ // use either, since the default may be set either numerically or
+ // symbolically.
+ //
+ // NOTE: May only be called for fields whose type has already been set.
+ // Also, will be reset to default if the field's type is set again.
+ void set_default(Value value) { upb_fielddef_setdefault(this, value); }
+ void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); }
+ void set_default(const char *str, size_t len) {
+ upb_fielddef_setdefaultstr(this, str, len);
+ }
+ void set_default(const std::string& str) {
+ upb_fielddef_setdefaultstr(this, str.c_str(), str.size());
+ }
+
+ // The results of this function are only meaningful for mutable enum fields,
+ // which can have a default specified either as an integer or as a string.
+ // If this returns true, the default returned from upb_fielddef_default() is
+ // a string, otherwise it is an integer.
+ bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); }
+
+ // Subdef. ///////////////////////////////////////////////////////////////////
+
+ // Submessage and enum fields must reference a "subdef", which is the
+ // MessageDef or EnumDef that defines their type. Note that when the
+ // FieldDef is mutable it may not have a subdef *yet*, but this still returns
+ // true to indicate that the field's type requires a subdef.
+ bool HasSubDef() { return upb_hassubdef(this); }
+
+ // Before a FieldDef is finalized, its subdef may be set either directly
+ // (with a Def*) or symbolically. Symbolic refs must be resolved by the
+ // client before the containing msgdef can be finalized.
+ //
+ // Both methods require that HasSubDef() (so the type must be set prior to
+ // calling these methods). Returns false if this is not the case, or if the
+ // given subdef is not of the correct type. The subtype is reset if the
+ // field's type is changed.
+ bool set_subdef(Def* def);
+ bool set_subtype_name(const char *name) {
+ return upb_fielddef_setsubtypename(this, name);
+ }
+ bool set_subtype_name(const std::string& str) {
+ return set_subtype_name(str.c_str());
}
- // Write accessors -- may not be called once the FieldDef is finalized.
+ // Returns the enum or submessage def or symbolic name for this field, if
+ // any. May only be called for fields where HasSubDef() is true. Returns
+ // NULL if the subdef has not been set or if you ask for a subtype name when
+ // the subtype is currently set symbolically (or vice-versa).
+ //
+ // Caller does *not* own a ref on the returned def or string.
+ // subtypename_name() is non-const because only mutable defs can have the
+ // subtype name set symbolically (symbolic references must be resolved before
+ // the MessageDef can be finalized).
+ const Def* subdef() const;
+ const char *subtype_name() { return upb_fielddef_subtypename(this); }
private:
- FieldDef();
- ~FieldDef();
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef);
+};
+
+class Def : public upb_def {
+ public:
+ // Converting from C types to C++ wrapper types.
+ static Def* Cast(upb_def *def) { return static_cast<Def*>(def); }
+ static const Def* Cast(const upb_def *def) {
+ return static_cast<const Def*>(def);
+ }
+
+ void Ref(void *owner) const { upb_def_ref(this, owner); }
+ void Unref(void *owner) const { upb_def_unref(this, owner); }
+
+ void set_full_name(const char *name) { upb_def_setfullname(this, name); }
+ void set_full_name(const std::string& name) {
+ upb_def_setfullname(this, name.c_str());
+ }
+
+ const char *full_name() const { return upb_def_fullname(this); }
+
+ // Finalizes the given list of defs (as well as the fielddefs for the given
+ // msgdefs). All defs reachable from any def in this list must either be
+ // already finalized or elsewhere in the list. Any symbolic references to
+ // enums or submessages must already have been resolved. Returns true on
+ // success, otherwise false is returned and status contains details. In the
+ // error case the input defs are unmodified. See the comment at the top of
+ // this file for the semantics of finalized defs.
+ //
+ // n is currently limited to 64k defs, if more are required break them into
+ // batches of 64k (or we could raise this limit, at the cost of a bigger
+ // upb_def structure or complexity in upb_def_finalize()).
+ static bool Finalize(Def*const* defs, int n, Status* status) {
+ return upb_finalize(reinterpret_cast<upb_def*const*>(defs), n, status);
+ }
+ static bool Finalize(const std::vector<Def*>& defs, Status* status) {
+ return Finalize(&defs[0], defs.size(), status);
+ }
};
class MessageDef : public upb_msgdef {
public:
// Converting from C types to C++ wrapper types.
- static MessageDef* Cast(upb_msgdef *md) { return (MessageDef*)md; }
+ static MessageDef* Cast(upb_msgdef *md) {
+ return static_cast<MessageDef*>(md);
+ }
static const MessageDef* Cast(const upb_msgdef *md) {
- return (MessageDef*)md;
+ return static_cast<const MessageDef*>(md);
+ }
+ static MessageDef* DynamicCast(Def* def) {
+ return Cast(upb_dyncast_msgdef(def));
+ }
+ static const MessageDef* DynamicCast(const Def* def) {
+ return Cast(upb_dyncast_msgdef_const(def));
}
- static MessageDef* New() { return Cast(upb_msgdef_new()); }
- MessageDef* Dup() { return Cast(upb_msgdef_dup(this)); }
+ Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
+ const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
+
+ static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); }
+ MessageDef* Dup(void *owner) const {
+ return Cast(upb_msgdef_dup(this, owner));
+ }
- void Ref() const { upb_msgdef_ref(this); }
- void Unref() const { upb_msgdef_unref(this); }
+ void Ref(void *owner) const { upb_msgdef_ref(this, owner); }
+ void Unref(void *owner) const { upb_msgdef_unref(this, owner); }
// Read accessors -- may be called at any time.
+ const char *full_name() const { return AsDef()->full_name(); }
+
// The total size of in-memory messages created with this MessageDef.
uint16_t instance_size() const { return upb_msgdef_size(this); }
@@ -116,25 +265,32 @@ class MessageDef : public upb_msgdef {
// Write accessors. May only be called before the msgdef is in a symtab.
+ void set_full_name(const char *name) { AsDef()->set_full_name(name); }
+ void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
+
void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); }
void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); }
bool SetExtensionRange(uint32_t start, uint32_t end) {
return upb_msgdef_setextrange(this, start, end);
}
- // Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains
- // its ref on the fielddef. May only be done before the msgdef is in a
- // symtab (requires upb_def_ismutable(m) for the msgdef). The fielddef's
- // name and number must be set, and the message may not already contain any
- // field with this name or number, and this fielddef may not be part of
- // another message, otherwise false is returned and no action is performed.
- bool AddFields(FieldDef*const * f, int n) {
- return upb_msgdef_addfields(this, (upb_fielddef**)f, n);
+ // Adds a set of fields (FieldDef objects) to a MessageDef. Caller passes a
+ // ref on the FieldDef to the MessageDef in both success and failure cases.
+ // May only be done before the MessageDef is in a SymbolTable (requires
+ // m->IsMutable() for the MessageDef). The FieldDef's name and number must
+ // be set, and the message may not already contain any field with this name
+ // or number, and this FieldDef may not be part of another message, otherwise
+ // false is returned and the MessageDef is unchanged.
+ bool AddField(FieldDef* f, void *owner) { return AddFields(&f, 1, owner); }
+ bool AddFields(FieldDef*const * f, int n, void *owner) {
+ return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner);
}
- bool AddFields(const std::vector<FieldDef*>& fields) {
- return AddFields(&fields[0], fields.size());
+ bool AddFields(const std::vector<FieldDef*>& fields, void *owner) {
+ return AddFields(&fields[0], fields.size(), owner);
}
+ int field_count() const { return upb_msgdef_numfields(this); }
+
// Lookup fields by name or number, returning NULL if no such field exists.
FieldDef* FindFieldByName(const char *name) {
return FieldDef::Cast(upb_msgdef_ntof(this, name));
@@ -156,19 +312,89 @@ class MessageDef : public upb_msgdef {
return FindFieldByNumber(num);
}
- // TODO: iteration over fields.
+ class Iterator : public upb_msg_iter {
+ public:
+ explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); }
+ Iterator() {}
+
+ FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
+ bool Done() { return upb_msg_done(this); }
+ void Next() { return upb_msg_next(this); }
+ };
+
+ class ConstIterator : public upb_msg_iter {
+ public:
+ explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); }
+ ConstIterator() {}
+
+ const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
+ bool Done() { return upb_msg_done(this); }
+ void Next() { return upb_msg_next(this); }
+ };
private:
- MessageDef();
- ~MessageDef();
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef);
+};
+
+class EnumDef : public upb_enumdef {
+ public:
+ // Converting from C types to C++ wrapper types.
+ static EnumDef* Cast(upb_enumdef *e) { return static_cast<EnumDef*>(e); }
+ static const EnumDef* Cast(const upb_enumdef *e) {
+ return static_cast<const EnumDef*>(e);
+ }
+
+ static EnumDef* New(void *owner) { return Cast(upb_enumdef_new(owner)); }
+
+ void Ref(void *owner) { upb_enumdef_ref(this, owner); }
+ void Unref(void *owner) { upb_enumdef_unref(this, owner); }
+ EnumDef* Dup(void *owner) const { return Cast(upb_enumdef_dup(this, owner)); }
+
+ Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
+ const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
+
+ int32_t default_value() const { return upb_enumdef_default(this); }
+
+ // May only be set if IsMutable().
+ void set_full_name(const char *name) { AsDef()->set_full_name(name); }
+ void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
+ void set_default_value(int32_t val) {
+ return upb_enumdef_setdefault(this, val);
+ }
+
+ // Adds a value to the enumdef. Requires that no existing val has this
+ // name or number (returns false and does not add if there is). May only
+ // be called if IsMutable().
+ bool AddValue(char *name, int32_t num) {
+ return upb_enumdef_addval(this, name, num);
+ }
+ bool AddValue(const std::string& name, int32_t num) {
+ return upb_enumdef_addval(this, name.c_str(), num);
+ }
+
+ // Lookups from name to integer and vice-versa.
+ bool LookupName(const char *name, int32_t* num) const {
+ return upb_enumdef_ntoi(this, name, num);
+ }
+
+ // Lookup from integer to name, returns a NULL-terminated string which
+ // the caller does not own, or NULL if not found.
+ const char *LookupNumber(int32_t num) const {
+ return upb_enumdef_iton(this, num);
+ }
+
+ private:
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef);
};
class SymbolTable : public upb_symtab {
public:
// Converting from C types to C++ wrapper types.
- static SymbolTable* Cast(upb_symtab *s) { return (SymbolTable*)s; }
+ static SymbolTable* Cast(upb_symtab *s) {
+ return static_cast<SymbolTable*>(s);
+ }
static const SymbolTable* Cast(const upb_symtab *s) {
- return (SymbolTable*)s;
+ return static_cast<const SymbolTable*>(s);
}
static SymbolTable* New() { return Cast(upb_symtab_new()); }
@@ -176,17 +402,50 @@ class SymbolTable : public upb_symtab {
void Ref() const { upb_symtab_unref(this); }
void Unref() const { upb_symtab_unref(this); }
+ // Adds the given defs to the symtab, resolving all symbols. Only one def
+ // per name may be in the list, but defs can replace existing defs in the
+ // symtab. The entire operation either succeeds or fails. If the operation
+ // fails, the symtab is unchanged, false is returned, and status indicates
+ // the error. The caller passes a ref on the defs in all cases.
+ bool Add(Def *const *defs, int n, void *owner, Status* status) {
+ return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status);
+ }
+ bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
+ return Add(&defs[0], defs.size(), owner, status);
+ }
+
// If the given name refers to a message in this symbol table, returns a new
// ref to that MessageDef object, otherwise returns NULL.
- const MessageDef* LookupMessage(const char *name) const {
- return MessageDef::Cast(upb_symtab_lookupmsg(this, name));
+ const MessageDef* LookupMessage(const char *name, void *owner) const {
+ return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner));
}
private:
- SymbolTable();
- ~SymbolTable();
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable);
};
+template <> inline const FieldDef* GetValue<const FieldDef*>(Value v) {
+ return static_cast<const FieldDef*>(upb_value_getfielddef(v));
+}
+
+template <> inline Value MakeValue<FieldDef*>(FieldDef* v) {
+ return upb_value_fielddef(v);
+}
+
+inline MessageDef* FieldDef::message() {
+ return MessageDef::Cast(upb_fielddef_msgdef(this));
+}
+inline const MessageDef* FieldDef::message() const {
+ return MessageDef::Cast(upb_fielddef_msgdef(this));
+}
+
+inline const Def* FieldDef::subdef() const {
+ return Def::Cast(upb_fielddef_subdef(this));
+}
+inline bool FieldDef::set_subdef(Def* def) {
+ return upb_fielddef_setsubdef(this, def);
+}
+
} // namespace upb
#endif
diff --git a/bindings/cpp/upb/handlers.cc b/bindings/cpp/upb/handlers.cc
new file mode 100644
index 0000000..c96a74e
--- /dev/null
+++ b/bindings/cpp/upb/handlers.cc
@@ -0,0 +1,39 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include "handlers.hpp"
+
+#include "def.hpp"
+
+namespace upb {
+
+namespace {
+
+void MessageCallbackWrapper(
+ void* closure, upb_mhandlers* mh, const upb_msgdef* m) {
+ Handlers::MessageRegistrationVisitor* visitor =
+ static_cast<Handlers::MessageRegistrationVisitor*>(closure);
+ visitor->OnMessage(static_cast<MessageHandlers*>(mh),
+ static_cast<const MessageDef*>(m));
+}
+
+void FieldCallbackWrapper(
+ void* closure, upb_fhandlers* fh, const upb_fielddef* f) {
+ Handlers::MessageRegistrationVisitor* visitor =
+ static_cast<Handlers::MessageRegistrationVisitor*>(closure);
+ visitor->OnField(static_cast<FieldHandlers*>(fh),
+ static_cast<const FieldDef*>(f));
+}
+} // namepace
+
+MessageHandlers* Handlers::RegisterMessageDef(
+ const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) {
+ upb_mhandlers* mh = upb_handlers_regmsgdef(
+ this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor);
+ return static_cast<MessageHandlers*>(mh);
+}
+
+} // namespace upb
diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp
index d356a33..a366c3d 100644
--- a/bindings/cpp/upb/handlers.hpp
+++ b/bindings/cpp/upb/handlers.hpp
@@ -15,11 +15,16 @@
#include "upb/handlers.h"
+#include "upb/upb.hpp"
+
namespace upb {
typedef upb_fieldtype_t FieldType;
typedef upb_flow_t Flow;
+typedef upb_sflow_t SubFlow;
class MessageHandlers;
+class MessageDef;
+class FieldDef;
class FieldHandlers : public upb_fhandlers {
public:
@@ -68,12 +73,11 @@ class FieldHandlers : public upb_fhandlers {
MessageHandlers* GetSubMessageHandlers() const;
// If set to >=0, the given hasbit will be set after the value callback is
// called (offset relative to the current closure).
- int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); }
- void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); }
+ int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); }
+ void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); }
private:
- FieldHandlers(); // Only created by upb::Handlers.
- ~FieldHandlers(); // Only destroyed by refcounting.
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers);
};
class MessageHandlers : public upb_mhandlers {
@@ -81,6 +85,13 @@ class MessageHandlers : public upb_mhandlers {
typedef upb_startmsg_handler StartMessageHandler;
typedef upb_endmsg_handler EndMessageHandler;
+ static MessageHandlers* Cast(upb_mhandlers* mh) {
+ return static_cast<MessageHandlers*>(mh);
+ }
+ static const MessageHandlers* Cast(const upb_mhandlers* mh) {
+ return static_cast<const MessageHandlers*>(mh);
+ }
+
// The MessageHandlers will live at least as long as the upb::Handlers to
// which it belongs, but can be Ref'd/Unref'd to make it live longer (which
// will prolong the life of the underlying upb::Handlers also).
@@ -89,7 +100,7 @@ class MessageHandlers : public upb_mhandlers {
// Functions to set this message's handlers.
// These return "this" so they can be conveniently chained, eg.
- // handlers->NewMessage()
+ // handlers->NewMessageHandlers()
// ->SetStartMessageHandler(&StartMessage)
// ->SetEndMessageHandler(&EndMessage);
MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) {
@@ -111,13 +122,13 @@ class MessageHandlers : public upb_mhandlers {
FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
FieldType type, bool repeated,
MessageHandlers* subm) {
+ (void)name;
return static_cast<FieldHandlers*>(
upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
}
private:
- MessageHandlers(); // Only created by upb::Handlers.
- ~MessageHandlers(); // Only destroyed by refcounting.
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers);
};
class Handlers : public upb_handlers {
@@ -134,17 +145,29 @@ class Handlers : public upb_handlers {
return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
}
+ // Convenience function for registering handlers for all messages and fields
+ // in a MessageDef and all its children. For every registered message,
+ // OnMessage will be called on the visitor with newly-created MessageHandlers
+ // and MessageDef. Likewise with OnField will be called with newly-created
+ // FieldHandlers and FieldDef for each field.
+ class MessageRegistrationVisitor {
+ public:
+ virtual ~MessageRegistrationVisitor() {}
+ virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0;
+ virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0;
+ };
+ MessageHandlers* RegisterMessageDef(const MessageDef& m,
+ MessageRegistrationVisitor* visitor);
+
private:
- Handlers(); // Only created by Handlers::New().
- ~Handlers(); // Only destroyed by refcounting.
+ UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers);
};
-
-MessageHandlers* FieldHandlers::GetMessageHandlers() const {
+inline MessageHandlers* FieldHandlers::GetMessageHandlers() const {
return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
}
-MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
+inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
}
diff --git a/bindings/cpp/upb/msg.hpp b/bindings/cpp/upb/msg.hpp
new file mode 100644
index 0000000..c7cf1f2
--- /dev/null
+++ b/bindings/cpp/upb/msg.hpp
@@ -0,0 +1,62 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+// Routines for reading and writing message data to an in-memory structure,
+// similar to a C struct.
+//
+// upb does not define one single message object that everyone must use.
+// Rather it defines an abstract interface for reading and writing members
+// of a message object, and all of the parsers and serializers use this
+// abstract interface. This allows upb's parsers and serializers to be used
+// regardless of what memory management scheme or synchronization model the
+// application is using.
+//
+// A standard set of accessors is provided for doing simple reads and writes at
+// a known offset into the message. These accessors should be used when
+// possible, because they are specially optimized -- for example, the JIT can
+// recognize them and emit specialized code instead of having to call the
+// function at all. The application can substitute its own accessors when the
+// standard accessors are not suitable.
+
+#ifndef UPB_MSG_HPP
+#define UPB_MSG_HPP
+
+#include "upb/msg.h"
+#include "upb/handlers.hpp"
+
+namespace upb {
+
+typedef upb_accessor_vtbl AccessorVTable;
+
+// Registers handlers for writing into a message of the given type using
+// whatever accessors it has defined.
+inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers,
+ const upb::MessageDef* md) {
+ return MessageHandlers::Cast(
+ upb_accessors_reghandlers(handlers, md));
+}
+
+template <typename T> static FieldHandlers::ValueHandler* GetValueHandler();
+
+// A handy templated function that will retrieve a value handler for a given
+// C++ type.
+#define GET_VALUE_HANDLER(type, ctype) \
+ template <> \
+ FieldHandlers::ValueHandler* GetValueHandler<ctype>() { \
+ return &upb_stdmsg_set ## type; \
+ }
+
+GET_VALUE_HANDLER(double, double);
+GET_VALUE_HANDLER(float, float);
+GET_VALUE_HANDLER(uint64, uint64_t);
+GET_VALUE_HANDLER(uint32, uint32_t);
+GET_VALUE_HANDLER(int64, int64_t);
+GET_VALUE_HANDLER(int32, int32_t);
+GET_VALUE_HANDLER(bool, bool);
+#undef GET_VALUE_HANDLER
+
+} // namespace
+
+#endif
diff --git a/bindings/cpp/upb/pb/glue.hpp b/bindings/cpp/upb/pb/glue.hpp
index be072a7..d43baeb 100644
--- a/bindings/cpp/upb/pb/glue.hpp
+++ b/bindings/cpp/upb/pb/glue.hpp
@@ -13,11 +13,23 @@
namespace upb {
+// All routines that load descriptors expect the descriptor to be a
+// FileDescriptorSet.
bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
Status* status) {
return upb_load_descriptor_file_into_symtab(s, fname, status);
}
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
+ size_t len, Status* status) {
+ return upb_load_descriptor_into_symtab(s, str, len, status);
+}
+
+template <typename T>
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
+ return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
+}
+
} // namespace upb
#endif
diff --git a/bindings/cpp/upb/proto2_bridge.cc b/bindings/cpp/upb/proto2_bridge.cc
new file mode 100644
index 0000000..6119295
--- /dev/null
+++ b/bindings/cpp/upb/proto2_bridge.cc
@@ -0,0 +1,892 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include <string>
+#include <typeinfo>
+#include "upb/bytestream.hpp"
+#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/msg.hpp"
+#include "upb/proto2_bridge.hpp"
+
+namespace {
+
+static void* GetFieldPointer(void *message, const upb::FieldDef* f) {
+ return static_cast<char*>(message) + f->offset();
+}
+
+} // namespace
+
+#ifdef UPB_GOOGLE3
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "net/proto2/public/repeated_field.h"
+#undef private
+
+#define private public
+#include "net/proto/proto2_reflection.h"
+#undef private
+
+#include "net/proto2/proto/descriptor.pb.h"
+#include "net/proto2/public/descriptor.h"
+#include "net/proto2/public/generated_message_reflection.h"
+#include "net/proto2/public/lazy_field.h"
+#include "net/proto2/public/message.h"
+#include "net/proto2/public/string_piece_field_support.h"
+#include "net/proto/internal_layout.h"
+#include "strings/cord.h"
+using ::proto2::Descriptor;
+using ::proto2::EnumDescriptor;
+using ::proto2::EnumValueDescriptor;
+using ::proto2::FieldDescriptor;
+using ::proto2::FieldOptions;
+using ::proto2::FileDescriptor;
+using ::proto2::internal::GeneratedMessageReflection;
+using ::proto2::internal::RepeatedPtrFieldBase;
+using ::proto2::internal::StringPieceField;
+using ::proto2::Message;
+using ::proto2::MessageFactory;
+using ::proto2::Reflection;
+using ::proto2::RepeatedField;
+using ::proto2::RepeatedPtrField;
+
+namespace upb {
+
+static const Message* GetPrototypeForField(const Message& m,
+ const FieldDescriptor* f);
+
+namespace proto2_bridge_google3 { class FieldAccessor; }
+
+using ::upb::proto2_bridge_google3::FieldAccessor;
+
+namespace proto2_bridge_google3 {
+
+static void AssignToCord(const ByteRegion* r, Cord* cord) {
+ // TODO(haberman): ref source data if source is a cord.
+ cord->Clear();
+ uint64_t ofs = r->start_ofs();
+ while (ofs < r->end_ofs()) {
+ size_t len;
+ const char *buf = r->GetPtr(ofs, &len);
+ cord->Append(StringPiece(buf, len));
+ ofs += len;
+ }
+}
+
+#else
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "google/protobuf/repeated_field.h"
+#undef protected
+
+#define private public
+#include "google/protobuf/generated_message_reflection.h"
+#undef private
+
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/message.h"
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::EnumDescriptor;
+using ::google::protobuf::EnumValueDescriptor;
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::FieldOptions;
+using ::google::protobuf::FileDescriptor;
+using ::google::protobuf::internal::GeneratedMessageReflection;
+using ::google::protobuf::internal::RepeatedPtrFieldBase;
+using ::google::protobuf::Message;
+using ::google::protobuf::MessageFactory;
+using ::google::protobuf::Reflection;
+using ::google::protobuf::RepeatedField;
+using ::google::protobuf::RepeatedPtrField;
+
+namespace upb {
+static const Message* GetPrototypeForField(const Message& m,
+ const FieldDescriptor* f);
+
+namespace proto2_bridge_opensource { class FieldAccessor; }
+
+using ::upb::proto2_bridge_opensource::FieldAccessor;
+
+namespace proto2_bridge_opensource {
+
+#endif // ifdef UPB_GOOGLE3
+
+// Have to define this manually since older versions of proto2 didn't define
+// an enum value for STRING.
+#define UPB_CTYPE_STRING 0
+
+// The code in this class depends on the internal representation of the proto2
+// generated classes, which is an internal implementation detail of proto2 and
+// is not a public interface. As a result, this class's implementation may
+// need to be changed if/when proto2 changes its internal representation. It
+// is intended that this class is the only code that depends on these internal,
+// non-public interfaces.
+//
+// This class only works with messages that use GeneratedMessageReflection.
+// Other reflection classes will need other accessor implementations.
+class FieldAccessor {
+ public:
+ // Returns true if we were able to set an accessor and any other properties
+ // of the FieldDef that are necessary to read/write this field to a
+ // proto2::Message.
+ static bool TrySet(const FieldDescriptor* proto2_f,
+ const upb::MessageDef* md,
+ upb::FieldDef* upb_f) {
+ const Message* prototype = static_cast<const Message*>(md->prototype);
+ const Reflection* base_r = prototype->GetReflection();
+ const GeneratedMessageReflection* r =
+ dynamic_cast<const GeneratedMessageReflection*>(base_r);
+ // Old versions of the open-source protobuf release erroneously default to
+ // Cord even though that has never been supported in the open-source
+ // release.
+ int32_t ctype = proto2_f->options().has_ctype() ?
+ proto2_f->options().ctype() : UPB_CTYPE_STRING;
+ if (!r) return false;
+ // Extensions not supported yet.
+ if (proto2_f->is_extension()) return false;
+
+ upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype));
+ upb_f->set_hasbit(GetHasbit(proto2_f, r));
+ upb_f->set_offset(GetOffset(proto2_f, r));
+ if (upb_f->IsSubmessage()) {
+ upb_f->set_subtype_name(proto2_f->message_type()->full_name());
+ upb_f->prototype = GetPrototypeForField(*prototype, proto2_f);
+ }
+
+ if (upb_f->IsString() && !upb_f->IsSequence() &&
+ ctype == UPB_CTYPE_STRING) {
+ upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL);
+ }
+ return true;
+ }
+
+ static MessageFactory* GetMessageFactory(const Message& m) {
+ const GeneratedMessageReflection* r =
+ dynamic_cast<const GeneratedMessageReflection*>(m.GetReflection());
+ return r ? r->message_factory_ : NULL;
+ }
+
+ private:
+ static int64_t GetHasbit(const FieldDescriptor* f,
+ const GeneratedMessageReflection* r) {
+ if (f->is_repeated()) {
+ // proto2 does not store hasbits for repeated fields.
+ return -1;
+ } else {
+ return (r->has_bits_offset_ * 8) + f->index();
+ }
+ }
+
+ static uint16_t GetOffset(const FieldDescriptor* f,
+ const GeneratedMessageReflection* r) {
+ return r->offsets_[f->index()];
+ }
+
+ static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f,
+ int32_t ctype) {
+ switch (f->cpp_type()) {
+ case FieldDescriptor::CPPTYPE_ENUM:
+ // Should handlers validate enum membership to match proto2?
+ case FieldDescriptor::CPPTYPE_INT32: return Get<int32_t>();
+ case FieldDescriptor::CPPTYPE_INT64: return Get<int64_t>();
+ case FieldDescriptor::CPPTYPE_UINT32: return Get<uint32_t>();
+ case FieldDescriptor::CPPTYPE_UINT64: return Get<uint64_t>();
+ case FieldDescriptor::CPPTYPE_DOUBLE: return Get<double>();
+ case FieldDescriptor::CPPTYPE_FLOAT: return Get<float>();
+ case FieldDescriptor::CPPTYPE_BOOL: return Get<bool>();
+ case FieldDescriptor::CPPTYPE_STRING:
+ switch (ctype) {
+#ifdef UPB_GOOGLE3
+ case FieldOptions::STRING:
+ return GetForString<string>();
+ case FieldOptions::CORD:
+ return GetForCord();
+ case FieldOptions::STRING_PIECE:
+ return GetForStringPiece();
+#else
+ case UPB_CTYPE_STRING:
+ return GetForString<std::string>();
+#endif
+ default: return NULL;
+ }
+ case FieldDescriptor::CPPTYPE_MESSAGE:
+#ifdef UPB_GOOGLE3
+ if (f->options().lazy()) {
+ return NULL; // Not yet implemented.
+ } else {
+ return GetForMessage();
+ }
+#else
+ return GetForMessage();
+#endif
+ default: return NULL;
+ }
+ }
+
+ // PushOffset handler (used for StartSequence and others) ///////////////////
+
+ static SubFlow PushOffset(void *m, Value fval) {
+ const FieldDef *f = GetValue<const FieldDef*>(fval);
+ return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
+ }
+
+ // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+ template <typename T> static AccessorVTable *Get() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ GetValueHandler<T>(),
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &Append<T>,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ template <typename T>
+ static Flow Append(void *_r, Value fval, Value val) {
+ (void)fval;
+ RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
+ r->Add(GetValue<T>(val));
+ return UPB_CONTINUE;
+ }
+
+ // String ////////////////////////////////////////////////////////////////////
+
+ template <typename T> static AccessorVTable *GetForString() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ &SetString<T>,
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &AppendString<T>,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ // This needs to be templated because google3 string is not std::string.
+ template <typename T> static Flow SetString(void *m, Value fval, Value val) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ T **str = static_cast<T**>(GetFieldPointer(m, f));
+ // If it points to the default instance, we must create a new instance.
+ if (*str == f->prototype) *str = new T();
+ GetValue<ByteRegion*>(val)->AssignToString(*str);
+ return UPB_CONTINUE;
+ }
+
+ template <typename T>
+ static Flow AppendString(void *_r, Value fval, Value val) {
+ (void)fval;
+ RepeatedPtrField<T>* r = static_cast<RepeatedPtrField<T>*>(_r);
+ GetValue<ByteRegion*>(val)->AssignToString(r->Add());
+ return UPB_CONTINUE;
+ }
+
+ // SubMessage ////////////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForMessage() {
+ static upb_accessor_vtbl vtbl = {
+ &StartSubMessage,
+ NULL, // Value handler
+ &PushOffset, // StartSequence handler
+ &StartRepeatedSubMessage,
+ NULL, // Repeated value handler
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static SubFlow StartSubMessage(void *m, Value fval) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ void **subm = static_cast<void**>(GetFieldPointer(m, f));
+ if (*subm == NULL || *subm == f->prototype) {
+ const Message* prototype = static_cast<const Message*>(f->prototype);
+ *subm = prototype->New();
+ }
+ return UPB_CONTINUE_WITH(*subm);
+ }
+
+ class RepeatedMessageTypeHandler {
+ public:
+ typedef void Type;
+ // AddAllocated() calls this, but only if other objects are sitting
+ // around waiting for reuse, which we will not do.
+ static void Delete(Type* t) {
+ (void)t;
+ assert(false);
+ }
+ };
+
+ // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+ // its base class RepeatedPtrFieldBase*.
+ static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
+ void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+ if (!submsg) {
+ const Message* prototype = static_cast<const Message*>(f->prototype);
+ submsg = prototype->New();
+ r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+ }
+ return UPB_CONTINUE_WITH(submsg);
+ }
+
+ // TODO(haberman): handle Extensions, Unknown Fields.
+
+#ifdef UPB_GOOGLE3
+ // Handlers for types/features only included in internal proto2 release:
+ // Cord, StringPiece, LazyField, and MessageSet.
+ // TODO(haberman): LazyField, MessageSet.
+
+ // Cord //////////////////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForCord() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ &SetCord,
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &AppendCord,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static Flow SetCord(void *m, Value fval, Value val) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
+ AssignToCord(GetValue<ByteRegion*>(val), field);
+ return UPB_CONTINUE;
+ }
+
+ static Flow AppendCord(void *_r, Value fval, Value val) {
+ RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
+ AssignToCord(GetValue<ByteRegion*>(val), r->Add());
+ return UPB_CONTINUE;
+ }
+
+ // StringPiece ///////////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForStringPiece() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ &SetStringPiece,
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &AppendStringPiece,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static void AssignToStringPieceField(const ByteRegion* r,
+ proto2::internal::StringPieceField* f) {
+ // TODO(haberman): alias if possible and enabled on the input stream.
+ // TODO(haberman): add a method to StringPieceField that lets us avoid
+ // this copy/malloc/free.
+ char *data = new char[r->Length()];
+ r->Copy(r->start_ofs(), r->Length(), data);
+ f->CopyFrom(StringPiece(data, r->Length()));
+ delete[] data;
+ }
+
+ static Flow SetStringPiece(void *m, Value fval, Value val) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ StringPieceField* field =
+ static_cast<StringPieceField*>(GetFieldPointer(m, f));
+ AssignToStringPieceField(GetValue<ByteRegion*>(val), field);
+ return UPB_CONTINUE;
+ }
+
+ static Flow AppendStringPiece(void* _r, Value fval, Value val) {
+ RepeatedPtrField<StringPieceField>* r =
+ static_cast<RepeatedPtrField<StringPieceField>*>(_r);
+ AssignToStringPieceField(GetValue<ByteRegion*>(val), r->Add());
+ return UPB_CONTINUE;
+ }
+
+#endif // UPB_GOOGLE3
+};
+
+#ifdef UPB_GOOGLE3
+
+// Proto1 accessor -- only needed inside Google.
+class Proto1FieldAccessor {
+ public:
+ // Returns true if we were able to set an accessor and any other properties
+ // of the FieldDef that are necessary to read/write this field to a
+ // proto2::Message.
+ static bool TrySet(const FieldDescriptor* proto2_f,
+ const upb::MessageDef* md,
+ upb::FieldDef* upb_f) {
+ const Message* m = static_cast<const Message*>(md->prototype);
+ const proto2::Reflection* base_r = m->GetReflection();
+ const _pi::Proto2Reflection* r =
+ dynamic_cast<const _pi::Proto2Reflection*>(base_r);
+ if (!r) return false;
+ // Extensions not supported yet.
+ if (proto2_f->is_extension()) return false;
+
+ const _pi::Field* f = r->GetFieldLayout(proto2_f);
+
+ if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) {
+ // Override the BYTES type that proto2 descriptors have for weak fields.
+ upb_f->set_type(UPB_TYPE(MESSAGE));
+ }
+
+ if (upb_f->IsSubmessage()) {
+ const Message* prototype = upb::GetPrototypeForField(*m, proto2_f);
+ upb_f->set_subtype_name(prototype->GetDescriptor()->full_name());
+ upb_f->prototype = prototype;
+ }
+
+ upb_f->set_accessor(GetForCrep(f->crep));
+ upb_f->set_hasbit(GetHasbit(proto2_f, r));
+ upb_f->set_offset(GetOffset(proto2_f, r));
+ return true;
+ }
+
+ private:
+ static int16_t GetHasbit(const FieldDescriptor* f,
+ const _pi::Proto2Reflection* r) {
+ if (f->is_repeated()) {
+ // proto1 does not store hasbits for repeated fields.
+ return -1;
+ } else {
+ return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index;
+ }
+ }
+
+ static uint16_t GetOffset(const FieldDescriptor* f,
+ const _pi::Proto2Reflection* r) {
+ return r->GetFieldLayout(f)->offset;
+ }
+
+ static AccessorVTable *GetForCrep(int crep) {
+#define PRIMITIVE(name, type_name) \
+ case _pi::CREP_REQUIRED_ ## name: \
+ case _pi::CREP_OPTIONAL_ ## name: \
+ case _pi::CREP_REPEATED_ ## name: return Get<type_name>();
+
+ switch (crep) {
+ PRIMITIVE(DOUBLE, double);
+ PRIMITIVE(FLOAT, float);
+ PRIMITIVE(INT64, int64_t);
+ PRIMITIVE(UINT64, uint64_t);
+ PRIMITIVE(INT32, int32_t);
+ PRIMITIVE(FIXED64, uint64_t);
+ PRIMITIVE(FIXED32, uint32_t);
+ PRIMITIVE(BOOL, bool);
+ case _pi::CREP_REQUIRED_STRING:
+ case _pi::CREP_OPTIONAL_STRING:
+ case _pi::CREP_REPEATED_STRING: return GetForString();
+ case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString();
+ case _pi::CREP_REQUIRED_CORD:
+ case _pi::CREP_OPTIONAL_CORD:
+ case _pi::CREP_REPEATED_CORD: return GetForCord();
+ case _pi::CREP_REQUIRED_GROUP:
+ case _pi::CREP_REQUIRED_FOREIGN:
+ case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage();
+ case _pi::CREP_OPTIONAL_GROUP:
+ case _pi::CREP_REPEATED_GROUP:
+ case _pi::CREP_OPTIONAL_FOREIGN:
+ case _pi::CREP_REPEATED_FOREIGN:
+ case _pi::CREP_OPTIONAL_FOREIGN_PROTO2:
+ case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage();
+ case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage();
+ default: assert(false); return NULL;
+ }
+#undef PRIMITIVE
+ }
+
+ // PushOffset handler (used for StartSequence and others) ///////////////////
+
+ // We can find a RepeatedField* or a RepeatedPtrField* at f->offset().
+ static SubFlow PushOffset(void *m, Value fval) {
+ const FieldDef *f = GetValue<const FieldDef*>(fval);
+ return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
+ }
+
+ // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+ template <typename T> static AccessorVTable *Get() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ GetValueHandler<T>(),
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &Append<T>,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ template <typename T>
+ static Flow Append(void *_r, Value fval, Value val) {
+ (void)fval;
+ // Proto1's ProtoArray class derives from RepeatedField.
+ RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
+ r->Add(GetValue<T>(val));
+ return UPB_CONTINUE;
+ }
+
+ // String ////////////////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForString() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ &SetString,
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &AppendString,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static Flow SetString(void *m, Value fval, Value val) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ string *str = static_cast<string*>(GetFieldPointer(m, f));
+ GetValue<ByteRegion*>(val)->AssignToString(str);
+ return UPB_CONTINUE;
+ }
+
+ static Flow AppendString(void *_r, Value fval, Value val) {
+ (void)fval;
+ RepeatedPtrField<string>* r = static_cast<RepeatedPtrField<string>*>(_r);
+ GetValue<ByteRegion*>(val)->AssignToString(r->Add());
+ return UPB_CONTINUE;
+ }
+
+ // Out-of-line string ////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForOutOfLineString() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, &SetOutOfLineString,
+ // This type is only used for non-repeated string fields.
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static Flow SetOutOfLineString(void *m, Value fval, Value val) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ string **str = static_cast<string**>(GetFieldPointer(m, f));
+ if (*str == &::ProtocolMessage::___empty_internal_proto_string_)
+ *str = new string();
+ GetValue<ByteRegion*>(val)->AssignToString(*str);
+ return UPB_CONTINUE;
+ }
+
+ // Cord //////////////////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForCord() {
+ static upb_accessor_vtbl vtbl = {
+ NULL, // StartSubMessage handler
+ &SetCord,
+ &PushOffset, // StartSequence handler
+ NULL, // StartRepeatedSubMessage handler
+ &AppendCord,
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static Flow SetCord(void *m, Value fval, Value val) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
+ AssignToCord(GetValue<ByteRegion*>(val), field);
+ return UPB_CONTINUE;
+ }
+
+ static Flow AppendCord(void *_r, Value fval, Value val) {
+ RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
+ AssignToCord(GetValue<ByteRegion*>(val), r->Add());
+ return UPB_CONTINUE;
+ }
+
+ // SubMessage ////////////////////////////////////////////////////////////////
+
+ static AccessorVTable *GetForRequiredMessage() {
+ static upb_accessor_vtbl vtbl = {
+ &PushOffset, // StartSubMessage handler
+ NULL, // Value handler
+ &PushOffset, // StartSequence handler
+ &StartRepeatedSubMessage,
+ NULL, // Repeated value handler
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static AccessorVTable *GetForWeakMessage() {
+ static upb_accessor_vtbl vtbl = {
+ &StartWeakSubMessage, // StartSubMessage handler
+ NULL, // Value handler
+ &PushOffset, // StartSequence handler
+ &StartRepeatedSubMessage,
+ NULL, // Repeated value handler
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static AccessorVTable *GetForMessage() {
+ static upb_accessor_vtbl vtbl = {
+ &StartSubMessage,
+ NULL, // Value handler
+ &PushOffset, // StartSequence handler
+ &StartRepeatedSubMessage,
+ NULL, // Repeated value handler
+ NULL, NULL, NULL, NULL, NULL, NULL};
+ return &vtbl;
+ }
+
+ static SubFlow StartSubMessage(void *m, Value fval) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
+ if (*subm == f->prototype) *subm = (*subm)->New();
+ return UPB_CONTINUE_WITH(*subm);
+ }
+
+ static SubFlow StartWeakSubMessage(void *m, Value fval) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
+ if (*subm == NULL) {
+ const Message* prototype = static_cast<const Message*>(f->prototype);
+ *subm = prototype->New();
+ }
+ return UPB_CONTINUE_WITH(*subm);
+ }
+
+ class RepeatedMessageTypeHandler {
+ public:
+ typedef void Type;
+ // AddAllocated() calls this, but only if other objects are sitting
+ // around waiting for reuse, which we will not do.
+ static void Delete(Type* t) {
+ (void)t;
+ assert(false);
+ }
+ };
+
+ // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+ // its base class RepeatedPtrFieldBase*.
+ static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
+ const FieldDef* f = GetValue<const FieldDef*>(fval);
+ RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
+ void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+ if (!submsg) {
+ const Message* prototype = static_cast<const Message*>(f->prototype);
+ submsg = prototype->New();
+ r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+ }
+ return UPB_CONTINUE_WITH(submsg);
+ }
+};
+
+#endif
+
+} // namespace proto2_bridge_{google3,opensource}
+
+static const Message* GetPrototypeForMessage(const Message& m) {
+ const Message* ret = NULL;
+ MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
+ if (factory) {
+ // proto2 generated message or DynamicMessage.
+ ret = factory->GetPrototype(m.GetDescriptor());
+ assert(ret);
+ } else {
+ // Proto1 message; since proto1 has no dynamic message, it must be
+ // from the generated factory.
+ ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor());
+ assert(ret); // If NULL, then wasn't a proto1 message, can't handle it.
+ }
+ assert(ret->GetReflection() == m.GetReflection());
+ return ret;
+}
+
+static const Message* GetPrototypeForField(const Message& m,
+ const FieldDescriptor* f) {
+#ifdef UPB_GOOGLE3
+ if (f->type() == FieldDescriptor::TYPE_BYTES) {
+ // Proto1 weak field: the proto2 descriptor says their type is BYTES.
+ const _pi::Proto2Reflection* r =
+ dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection());
+ assert(r);
+ const _pi::Field* field = r->GetFieldLayout(f);
+ assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK);
+ return GetPrototypeForMessage(
+ *static_cast<const Message*>(field->weak_layout()->default_instance));
+ } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) {
+ // Proto1 message; since proto1 has no dynamic message, it must be from
+ // the generated factory.
+ const Message* ret =
+ MessageFactory::generated_factory()->GetPrototype(f->message_type());
+ assert(ret);
+ return ret;
+ }
+#endif
+ assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE);
+ // We assume that all submessages (and extensions) will be constructed using
+ // the same MessageFactory as this message. This doesn't cover the case of
+ // CodedInputStream::SetExtensionRegistry().
+ MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
+ assert(factory); // If neither proto1 nor proto2 we can't handle it.
+ const Message* ret = factory->GetPrototype(f->message_type());
+ assert(ret);
+ return ret;
+}
+
+namespace proto2_bridge {
+
+upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) {
+ upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f);
+ upb_f->set_number(f->number());
+ upb_f->set_name(f->name());
+ upb_f->set_label(static_cast<upb::Label>(f->label()));
+ upb_f->set_type(static_cast<upb::FieldType>(f->type()));
+
+ if (!FieldAccessor::TrySet(f, md, upb_f)
+#ifdef UPB_GOOGLE3
+ && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f)
+#endif
+ ) {
+ // Unsupported reflection class.
+ assert(false);
+ }
+
+ if (upb_f->type() == UPB_TYPE(ENUM)) {
+ // We set the enum default symbolically.
+ upb_f->set_default(f->default_value_enum()->name());
+ upb_f->set_subtype_name(f->enum_type()->full_name());
+ } else {
+ // Set field default for primitive types. Need to switch on the upb type
+ // rather than the proto2 type, because upb_f->type() may have been changed
+ // from BYTES to MESSAGE for a weak field.
+ switch (upb_types[upb_f->type()].inmemory_type) {
+ case UPB_CTYPE_INT32:
+ upb_f->set_default(MakeValue(f->default_value_int32()));
+ break;
+ case UPB_CTYPE_INT64:
+ upb_f->set_default(
+ MakeValue(static_cast<int64_t>(f->default_value_int64())));
+ break;
+ case UPB_CTYPE_UINT32:
+ upb_f->set_default(MakeValue(f->default_value_uint32()));
+ break;
+ case UPB_CTYPE_UINT64:
+ upb_f->set_default(
+ MakeValue(static_cast<uint64_t>(f->default_value_uint64())));
+ break;
+ case UPB_CTYPE_DOUBLE:
+ upb_f->set_default(MakeValue(f->default_value_double()));
+ break;
+ case UPB_CTYPE_FLOAT:
+ upb_f->set_default(MakeValue(f->default_value_float()));
+ break;
+ case UPB_CTYPE_BOOL:
+ upb_f->set_default(MakeValue(f->default_value_bool()));
+ break;
+ case UPB_CTYPE_BYTEREGION:
+ upb_f->set_default(f->default_value_string());
+ break;
+ }
+ }
+ return md->AddField(upb_f, &upb_f) ? upb_f : NULL;
+}
+
+upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) {
+ upb::MessageDef *md = upb::MessageDef::New(owner);
+ md->set_full_name(m.GetDescriptor()->full_name());
+ md->prototype = GetPrototypeForMessage(m);
+ return md;
+}
+
+upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) {
+ upb::EnumDef* e = upb::EnumDef::New(owner);
+ e->set_full_name(desc->full_name());
+ for (int i = 0; i < desc->value_count(); i++) {
+ const EnumValueDescriptor* val = desc->value(i);
+ bool success = e->AddValue(val->name(), val->number());
+ assert(success);
+ (void)success;
+ }
+ return e;
+}
+
+void AddAllFields(upb::MessageDef* md) {
+ const Descriptor* d =
+ static_cast<const Message*>(md->prototype)->GetDescriptor();
+ for (int i = 0; i < d->field_count(); i++) {
+#ifdef UPB_GOOGLE3
+ // Skip lazy fields for now since we can't properly handle them.
+ if (d->field(i)->options().lazy()) continue;
+#endif
+ // Extensions not supported yet.
+ if (d->field(i)->is_extension()) continue;
+ AddFieldDef(d->field(i), md);
+ }
+}
+
+upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) {
+ upb::MessageDef* md = NewEmptyMessageDef(m, owner);
+ AddAllFields(md);
+ // TODO(haberman): add unknown field handler and extensions.
+ return md;
+}
+
+typedef std::map<std::string, upb::Def*> SymbolMap;
+
+static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner,
+ SymbolMap* symbols) {
+ upb::MessageDef* md = NewFullMessageDef(m, owner);
+ // Must do this before processing submessages to prevent infinite recursion.
+ (*symbols)[std::string(md->full_name())] = md->AsDef();
+
+ for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) {
+ upb::FieldDef* f = i.field();
+ if (!f->HasSubDef()) continue;
+ SymbolMap::iterator iter = symbols->find(f->subtype_name());
+ upb::Def* subdef;
+ if (iter != symbols->end()) {
+ subdef = iter->second;
+ } else {
+ const FieldDescriptor* proto2_f =
+ m.GetDescriptor()->FindFieldByNumber(f->number());
+ if (f->type() == UPB_TYPE(ENUM)) {
+ subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef();
+ (*symbols)[std::string(subdef->full_name())] = subdef;
+ } else {
+ assert(f->IsSubmessage());
+ const Message* prototype = GetPrototypeForField(m, proto2_f);
+ subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef();
+ }
+ }
+ f->set_subdef(subdef);
+ }
+ return md;
+}
+
+const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) {
+ SymbolMap symbols;
+ upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols);
+
+ // Finalize defs.
+ std::vector<upb::Def*> defs;
+ SymbolMap::iterator iter;
+ for (iter = symbols.begin(); iter != symbols.end(); ++iter) {
+ defs.push_back(iter->second);
+ }
+ Status status;
+ bool success = Def::Finalize(defs, &status);
+ assert(success);
+ (void)success;
+
+ // Unref all defs except the top-level one that we are returning.
+ for (int i = 0; i < static_cast<int>(defs.size()); i++) {
+ if (defs[i] != ret->AsDef()) defs[i]->Unref(owner);
+ }
+
+ return ret;
+}
+
+} // namespace proto2_bridge
+} // namespace upb
diff --git a/bindings/cpp/upb/proto2_bridge.hpp b/bindings/cpp/upb/proto2_bridge.hpp
new file mode 100644
index 0000000..ace08ce
--- /dev/null
+++ b/bindings/cpp/upb/proto2_bridge.hpp
@@ -0,0 +1,170 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// A bridge between upb and proto2, allows populating proto2 generated
+// classes using upb's parser, translating between descriptors and defs, etc.
+//
+// This is designed to be able to be compiled against either the open-source
+// version of protocol buffers or the Google-internal proto2. The two are
+// the same in most ways, but live in different namespaces (proto2 vs
+// google::protobuf) and have a few other more minor differences.
+//
+// The bridge gives you a lot of control over which fields will be written to
+// the message (fields that are not written will just be skipped), and whether
+// unknown fields are written to the UnknownFieldSet. This can save a lot of
+// work if the client only cares about some subset of the fields.
+//
+// Example usage:
+//
+// // Build a def that will have all fields and parse just like proto2 would.
+// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto());
+//
+// // JIT the parser; should only be done once ahead-of-time.
+// upb::Handlers* handlers = upb::NewHandlersForMessage(md);
+// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers);
+// handlers->Unref();
+//
+// // The actual parsing.
+// MyProto proto;
+// upb::Decoder decoder;
+// upb::StringSource source(buf, len);
+// decoder.ResetPlan(plan, 0);
+// decoder.ResetInput(source.AllBytes(), &proto);
+// CHECK(decoder.Decode() == UPB_OK) << decoder.status();
+//
+// To parse only one field and skip all others:
+//
+// const upb::MessageDef* md =
+// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype());
+// upb::proto2_bridge::AddFieldDef(
+// MyProto::descriptor()->FindFieldByName("my_field"), md);
+// upb::Finalize(md);
+//
+// // Now continue with "JIT the parser" from above.
+//
+// Note that there is currently no support for
+// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate
+// DescriptorPool and MessageFactory for extensions. Since this is a property
+// of the input in proto2, it's difficult to build a plan ahead-of-time that
+// can properly support this. If it's an important use case, the caller should
+// probably build a upb plan explicitly.
+
+#ifndef UPB_PROTO2_BRIDGE
+#define UPB_PROTO2_BRIDGE
+
+#include <vector>
+
+namespace google {
+namespace protobuf {
+class Descriptor;
+class EnumDescriptor;
+class FieldDescriptor;
+class FileDescriptor;
+class Message;
+} // namespace google
+} // namespace protobuf
+
+namespace proto2 {
+class Descriptor;
+class EnumDescriptor;
+class FieldDescriptor;
+class FileDescriptor;
+class Message;
+} // namespace proto2
+
+
+namespace upb {
+
+class Def;
+class FieldDef;
+class MessageDef;
+
+namespace proto2_bridge {
+
+// Unfinalized defs ////////////////////////////////////////////////////////////
+
+// Creating of UNFINALIZED defs. All of these functions return defs that are
+// still mutable and have not been finalized. They must be finalized before
+// using them to parse anything. This is useful if you want more control over
+// the process of constructing defs, eg. to add the specific set of fields you
+// care about.
+
+// Creates a new upb::MessageDef that corresponds to the type in the given
+// prototype message. The MessageDef will not have any fields added to it.
+upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner);
+upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc,
+ void *owner);
+
+// Adds a new upb::FieldDef to the given MessageDef corresponding to the given
+// FieldDescriptor. The FieldDef will be given an accessor and offset so that
+// it can be used to read and write data into the proto2::Message classes.
+// The given MessageDef must have been constructed with NewEmptyDefForMessage()
+// and f->containing_type() must correspond to the message that was used.
+//
+// Any submessage, group, or enum fields will be given symbolic references to
+// the subtype, which must be resolved before the MessageDef can be finalized.
+//
+// On success, returns the FieldDef that was added (caller does not own a ref).
+// If an existing field had the same name or number, returns NULL.
+upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f,
+ upb::MessageDef* md);
+upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f,
+ upb::MessageDef* md);
+
+// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds
+// FieldDefs for all fields defined in the original message, but not for any
+// extensions or unknown fields. The given MessageDef must not have any fields
+// that have the same name or number as any of the fields we are adding (the
+// easiest way to guarantee this is to start with an empty MessageDef).
+//
+// Returns true on success or false if any of the fields could not be added.
+void AddAllFields(upb::MessageDef* md);
+
+// TODO(haberman): Add:
+// // Adds a handler that will store unknown fields in the UnknownFieldSet.
+// void AddUnknownFieldHandler(upb::MessageDef* md);
+
+// Returns a new upb::MessageDef that contains handlers for all fields, unknown
+// fields, and any extensions in the descriptor's pool. The resulting
+// def/handlers should be equivalent to the generated code constructed by the
+// protobuf compiler (or the code in DynamicMessage) for the given type.
+// The subdefs for message/enum fields (if any) will be referenced symbolically,
+// and will need to be resolved before being finalized.
+//
+// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions).
+//
+// TODO(haberman): possibly add a similar function that lets you supply a
+// separate DescriptorPool and MessageFactory for extensions, to support
+// proto2's io::CodedInputStream::SetExtensionRegistry().
+upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner);
+upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m,
+ void *owner);
+
+// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor.
+// Caller owns a ref on the returned EnumDef.
+upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner);
+upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc,
+ void *owner);
+
+// Finalized defs //////////////////////////////////////////////////////////////
+
+// These functions return FINALIZED defs, meaning that they are immutable and
+// ready for use. Since they are immutable you cannot make any further changes
+// to eg. the set of fields, but these functions are more convenient if you
+// simply want to parse a message exactly how the built-in proto2 parser would.
+
+// Creates a returns a finalized MessageDef for the give message and its entire
+// type tree that will include all fields and unknown handlers (ie. it will
+// parse just like proto2 would).
+const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m,
+ void *owner);
+const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m,
+ void *owner);
+
+} // namespace proto2_bridge
+} // namespace upb
+
+#endif
diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp
index 226859c..48c2708 100644
--- a/bindings/cpp/upb/upb.hpp
+++ b/bindings/cpp/upb/upb.hpp
@@ -10,6 +10,16 @@
#include "upb/upb.h"
#include <iostream>
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11)
+#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
+ class_name() = delete; \
+ ~class_name() = delete;
+#else
+#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
+ class_name(); \
+ ~class_name();
+#endif
+
namespace upb {
typedef upb_success_t Success;
@@ -31,11 +41,35 @@ class Status : public upb_status {
void Clear() { upb_status_clear(this); }
};
-class Value : public upb_value {
- public:
- Value(const upb_value& val) { *this = val; }
- Value() {}
-};
+typedef upb_value Value;
+
+template <typename T> T GetValue(Value v);
+template <typename T> Value MakeValue(T v);
+
+#define UPB_VALUE_ACCESSORS(type, ctype) \
+ template <> inline ctype GetValue<ctype>(Value v) { \
+ return upb_value_get ## type(v); \
+ } \
+ template <> inline Value MakeValue<ctype>(ctype v) { \
+ return upb_value_ ## type(v); \
+ }
+
+UPB_VALUE_ACCESSORS(double, double);
+UPB_VALUE_ACCESSORS(float, float);
+UPB_VALUE_ACCESSORS(int32, int32_t);
+UPB_VALUE_ACCESSORS(int64, int64_t);
+UPB_VALUE_ACCESSORS(uint32, uint32_t);
+UPB_VALUE_ACCESSORS(uint64, uint64_t);
+UPB_VALUE_ACCESSORS(bool, bool);
+
+#undef UPB_VALUE_ACCESSORS
+
+template <typename T> inline T* GetPtrValue(Value v) {
+ return static_cast<T*>(upb_value_getptr(v));
+}
+template <typename T> inline Value MakePtrValue(T* v) {
+ return upb_value_ptr(static_cast<void*>(v));
+}
INLINE std::ostream& operator<<(std::ostream& out, const Status& status) {
out << status.GetString();
diff --git a/bindings/lua/upb.c b/bindings/lua/upb.c
index 56c5be9..4cce4b6 100644
--- a/bindings/lua/upb.c
+++ b/bindings/lua/upb.c
@@ -37,11 +37,15 @@ static uint32_t lupb_touint32(lua_State *L, int narg, const char *name) {
return n;
}
-static void lupb_pushstring(lua_State *L, const upb_byteregion *r) {
- // TODO: could avoid a copy in the case that the string is contiguous.
- char *str = upb_byteregion_strdup(r);
- lua_pushlstring(L, str, upb_byteregion_len(r));
- free(str);
+static void lupb_pushstring(lua_State *L, const upb_strref *ref) {
+ if (ref->ptr) {
+ lua_pushlstring(L, ref->ptr, ref->len);
+ } else {
+ // Lua requires a continguous string; must copy+allocate.
+ char *str = upb_strref_dup(ref);
+ lua_pushlstring(L, str, ref->len);
+ free(str);
+ }
}
static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
@@ -73,7 +77,7 @@ static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
// Returns a scalar value (ie. not a submessage) as a upb_value.
static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f,
- upb_byteregion *ref) {
+ upb_strref *ref) {
assert(!upb_issubmsg(f));
upb_value val;
if (upb_fielddef_type(f) == UPB_TYPE(BOOL)) {
@@ -135,7 +139,7 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f,
}
static void lupb_typecheck(lua_State *L, int narg, upb_fielddef *f) {
- upb_byteregion ref;
+ upb_strref ref;
lupb_getvalue(L, narg, f, &ref);
}
@@ -298,8 +302,8 @@ static void lupb_fielddef_set(lua_State *L, upb_fielddef *f,
} else if (streql(field, "default_value")) {
if (!upb_fielddef_type(f))
luaL_error(L, "Must set type before setting default_value");
- upb_byteregion region;
- upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &region));
+ upb_strref ref;
+ upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &ref));
} else {
luaL_error(L, "Cannot set fielddef member '%s'", field);
}
@@ -778,7 +782,7 @@ static upb_flow_t lupb_msg_string(void *m, upb_value fval, upb_value val,
lua_State *L = *(lua_State**)m;
int offset = array ? lua_rawlen(L, -1) : f->offset;
if (!lua_checkstack(L, 1)) luaL_error(L, "stack full");
- lupb_pushstring(L, upb_value_getbyteregion(val));
+ lupb_pushstring(L, upb_value_getstrref(val));
lua_rawseti(L, -2, offset);
return UPB_CONTINUE;
}
diff --git a/bindings/python/upb.c b/bindings/python/upb.c
index 8f36f70..497074b 100644
--- a/bindings/python/upb.c
+++ b/bindings/python/upb.c
@@ -612,9 +612,8 @@ static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval
static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) {
PyObject **str = PyUpb_Accessor_GetPtr(m, fval);
if (*str) { Py_DECREF(*str); }
- upb_byteregion *r = upb_value_getbyteregion(val);
- *str = PyString_FromStringAndSize(NULL, upb_byteregion_len(r));
- upb_byteregion_copyall(r, PyString_AsString(*str));
+ *str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
+ upb_strref_read(upb_value_getstrref(val), PyString_AsString(*str));
upb_stdmsg_sethas(m, fval);
return UPB_CONTINUE;
}
@@ -622,9 +621,8 @@ static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value v
static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) {
(void)fval;
PyObject **elem = upb_stdarray_append(a, sizeof(void*));
- upb_byteregion *r = upb_value_getbyteregion(val);
- *elem = PyString_FromStringAndSize(NULL, upb_byteregion_len(r));
- upb_byteregion_copyall(r, PyString_AsString(*elem));
+ *elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
+ upb_strref_read(upb_value_getstrref(val), PyString_AsString(*elem));
return UPB_CONTINUE;
}
diff --git a/tests/test.proto b/tests/test.proto
index f3dde24..e634ed2 100644
--- a/tests/test.proto
+++ b/tests/test.proto
@@ -1,14 +1,10 @@
// A series of messages with various kinds of cycles in them.
-// +-+---+ +---+
-// V | | | |
-// A -> B-+-> C -> D<--+
-// ^ | |
-// +----------+----+
-//
-// This tests the following cases:
-// - B and C are together in multiple cycles
-// - B and D are cycles to themselves.
+// +-+---+ +---+ +---+
+// V | | V | V |
+// A -> B-+-> C -> D---+--->E---+
+// ^ |`---|--------^
+// +----------+----+ F
message A {
optional B b = 1;
@@ -23,11 +19,21 @@ message C {
optional A a = 1;
optional B b = 2;
optional D d = 3;
+ optional E e = 4;
}
message D {
optional A a = 1;
optional D d = 2;
+ optional E e = 3;
+}
+
+message E {
+ optional E e = 1;
+}
+
+message F {
+ optional E e = 1;
}
// A proto with a bunch of simple primitives.
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc
index 5182217..4d70e85 100644
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -15,6 +15,7 @@
#include "upb/upb.hpp"
#include "upb/pb/decoder.hpp"
#include "upb/pb/glue.hpp"
+#include "upb_test.h"
static void TestSymbolTable(const char *descriptor_file) {
upb::SymbolTable *s = upb::SymbolTable::New();
@@ -23,20 +24,20 @@ static void TestSymbolTable(const char *descriptor_file) {
std::cerr << "Couldn't load descriptor: " << status;
exit(1);
}
- const upb::MessageDef *md = s->LookupMessage("A");
- assert(md);
+ const upb::MessageDef *md = s->LookupMessage("A", &md);
+ ASSERT(md);
s->Unref();
- md->Unref();
+ md->Unref(&md);
}
static void TestByteStream() {
upb::StringSource stringsrc;
stringsrc.Reset("testing", 7);
upb::ByteRegion* byteregion = stringsrc.AllBytes();
- assert(byteregion->FetchAll() == UPB_BYTE_OK);
+ ASSERT(byteregion->FetchAll() == UPB_BYTE_OK);
char* str = byteregion->StrDup();
- assert(strcmp(str, "testing") == 0);
+ ASSERT(strcmp(str, "testing") == 0);
free(str);
}
diff --git a/tests/test_decoder.c b/tests/test_decoder.cc
index 14d0e2d..13403bb 100644
--- a/tests/test_decoder.c
+++ b/tests/test_decoder.cc
@@ -21,6 +21,10 @@
* of submsg/sequences, etc.
*/
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS // For PRIuS, etc.
+#endif
+
#include <inttypes.h>
#include <stdarg.h>
#include <stdint.h>
@@ -32,95 +36,133 @@
#include "upb/upb.h"
#include "upb_test.h"
+// Copied from decoder.c, since this is not a public interface.
typedef struct {
- char *buf;
- size_t len;
-} buffer;
+ uint8_t native_wire_type;
+ bool is_numeric;
+} upb_decoder_typeinfo;
+
+static const upb_decoder_typeinfo upb_decoder_types[] = {
+ {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP
+ {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE
+ {UPB_WIRE_TYPE_32BIT, true}, // FLOAT
+ {UPB_WIRE_TYPE_VARINT, true}, // INT64
+ {UPB_WIRE_TYPE_VARINT, true}, // UINT64
+ {UPB_WIRE_TYPE_VARINT, true}, // INT32
+ {UPB_WIRE_TYPE_64BIT, true}, // FIXED64
+ {UPB_WIRE_TYPE_32BIT, true}, // FIXED32
+ {UPB_WIRE_TYPE_VARINT, true}, // BOOL
+ {UPB_WIRE_TYPE_DELIMITED, false}, // STRING
+ {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP
+ {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE
+ {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES
+ {UPB_WIRE_TYPE_VARINT, true}, // UINT32
+ {UPB_WIRE_TYPE_VARINT, true}, // ENUM
+ {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32
+ {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64
+ {UPB_WIRE_TYPE_VARINT, true}, // SINT32
+ {UPB_WIRE_TYPE_VARINT, true}, // SINT64
+};
+
+
+class buffer {
+ public:
+ buffer(const void *data, size_t len) : len_(0) { append(data, len); }
+ explicit buffer(const char *data) : len_(0) { append(data); }
+ explicit buffer(size_t len) : len_(len) { memset(buf_, 0, len); }
+ buffer(const buffer& buf) : len_(0) { append(buf); }
+ buffer() : len_(0) {}
+
+ void append(const void *data, size_t len) {
+ ASSERT_NOCOUNT(len + len_ < sizeof(buf_));
+ memcpy(buf_ + len_, data, len);
+ len_ += len;
+ buf_[len_] = NULL;
+ }
-// Mem is initialized to NULL.
-buffer *buffer_new(size_t len) {
- buffer *buf = malloc(sizeof(*buf));
- buf->buf = malloc(len);
- buf->len = len;
- memset(buf->buf, 0, buf->len);
- return buf;
-}
+ void append(const buffer& buf) {
+ append(buf.buf_, buf.len_);
+ }
-buffer *buffer_new2(const void *data, size_t len) {
- buffer *buf = buffer_new(len);
- memcpy(buf->buf, data, len);
- return buf;
-}
+ void append(const char *str) {
+ append(str, strlen(str));
+ }
-buffer *buffer_new3(const char *data) {
- return buffer_new2(data, strlen(data));
-}
+ void vappendf(const char *fmt, va_list args) {
+ size_t avail = sizeof(buf_) - len_;
+ size_t size = vsnprintf(buf_ + len_, avail, fmt, args);
+ ASSERT_NOCOUNT(avail > size);
+ len_ += size;
+ }
-buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); }
+ void appendf(const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ vappendf(fmt, args);
+ va_end(args);
+ }
-void buffer_free(buffer *buf) {
- free(buf->buf);
- free(buf);
-}
+ void assign(const buffer& buf) {
+ clear();
+ append(buf);
+ }
-void buffer_appendf(buffer *buf, const char *fmt, ...) {
- va_list args;
- va_start(args, fmt);
- size_t size = buf->len;
- buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args);
- va_end(args);
-}
+ bool eql(const buffer& other) const {
+ return len_ == other.len_ && memcmp(buf_, other.buf_, len_) == 0;
+ }
-void buffer_cat(buffer *buf, buffer *buf2) {
- size_t newlen = buf->len + buf2->len;
- buf->buf = realloc(buf->buf, newlen);
- memcpy(buf->buf + buf->len, buf2->buf, buf2->len);
- buf->len = newlen;
- buffer_free(buf2);
-}
+ void clear() { len_ = 0; }
+ size_t len() const { return len_; }
+ const char *buf() const { return buf_; }
-bool buffer_eql(buffer *buf, buffer *buf2) {
- return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0;
-}
+ private:
+ // Has to be big enough for the largest string used in the test.
+ char buf_[32768];
+ size_t len_;
+};
/* Routines for building arbitrary protos *************************************/
-buffer *cat(buffer *arg1, ...) {
- va_list ap;
- buffer *arg;
- va_start(ap, arg1);
- while ((arg = va_arg(ap, buffer*)) != NULL) {
- buffer_cat(arg1, arg);
- }
- va_end(ap);
- return arg1;
+const buffer empty;
+
+buffer cat(const buffer& a, const buffer& b,
+ const buffer& c = empty,
+ const buffer& d = empty,
+ const buffer& e = empty) {
+ buffer ret;
+ ret.append(a);
+ ret.append(b);
+ ret.append(c);
+ ret.append(d);
+ ret.append(e);
+ return ret;
}
-buffer *varint(uint64_t x) {
- buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1);
- buf->len = upb_vencode64(x, buf->buf);
- return buf;
+buffer varint(uint64_t x) {
+ char buf[UPB_PB_VARINT_MAX_LEN];
+ size_t len = upb_vencode64(x, buf);
+ return buffer(buf, len);
}
// TODO: proper byte-swapping for big-endian machines.
-buffer *fixed32(void *data) { return buffer_new2(data, 4); }
-buffer *fixed64(void *data) { return buffer_new2(data, 8); }
-
-buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); }
-buffer *uint32(uint32_t u32) { return fixed32(&u32); }
-buffer *uint64(uint64_t u64) { return fixed64(&u64); }
-buffer *flt(float f) { return fixed32(&f); }
-buffer *dbl(double d) { return fixed64(&d); }
-buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
-buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
-
-buffer *tag(uint32_t fieldnum, char wire_type) {
+buffer fixed32(void *data) { return buffer(data, 4); }
+buffer fixed64(void *data) { return buffer(data, 8); }
+
+buffer delim(const buffer& buf) { return cat(varint(buf.len()), buf); }
+buffer uint32(uint32_t u32) { return fixed32(&u32); }
+buffer uint64(uint64_t u64) { return fixed64(&u64); }
+buffer flt(float f) { return fixed32(&f); }
+buffer dbl(double d) { return fixed64(&d); }
+buffer zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
+buffer zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
+
+buffer tag(uint32_t fieldnum, char wire_type) {
return varint((fieldnum << 3) | wire_type);
}
-buffer *submsg(uint32_t fn, buffer *buf) {
- return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL );
+buffer submsg(uint32_t fn, const buffer& buf) {
+ return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
}
@@ -128,11 +170,26 @@ buffer *submsg(uint32_t fn, buffer *buf) {
// The handlers simply append to a string indicating what handlers were called.
// This string is similar to protobuf text format but fields are referred to by
-// number instead of name and sequences are explicitly delimited.
+// number instead of name and sequences are explicitly delimited. We indent
+// using the closure depth to test that the stack of closures is properly
+// handled.
+
+int closures[UPB_MAX_NESTING];
+buffer output;
+
+void indentbuf(buffer *buf, int depth) {
+ for (int i = 0; i < depth; i++)
+ buf->append(" ", 2);
+}
+
+void indent(void *depth) {
+ indentbuf(&output, *(int*)depth);
+}
#define VALUE_HANDLER(member, fmt) \
upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
- buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ", \
+ indent(closure); \
+ output.appendf("%" PRIu32 ":%" fmt "\n", \
upb_value_getuint32(fval), upb_value_get ## member(val)); \
return UPB_CONTINUE; \
}
@@ -145,7 +202,8 @@ VALUE_HANDLER(float, "g")
VALUE_HANDLER(double, "g")
upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
- buffer_appendf(closure, "%" PRIu32 ":%s; ",
+ indent(closure);
+ output.appendf("%" PRIu32 ":%s\n",
upb_value_getuint32(fval),
upb_value_getbool(val) ? "true" : "false");
return UPB_CONTINUE;
@@ -153,34 +211,49 @@ upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
// Note: won't work with strings that contain NULL.
+ indent(closure);
char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
- buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str);
+ output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), str);
free(str);
return UPB_CONTINUE;
}
upb_sflow_t startsubmsg(void *closure, upb_value fval) {
- buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval));
- return UPB_CONTINUE_WITH(closure);
+ indent(closure);
+ output.appendf("%" PRIu32 ":{\n", upb_value_getuint32(fval));
+ return UPB_CONTINUE_WITH(((int*)closure) + 1);
}
upb_flow_t endsubmsg(void *closure, upb_value fval) {
- (void)fval;
- buffer_appendf(closure, "} ");
+ indent(closure);
+ output.append("}\n");
return UPB_CONTINUE;
}
upb_sflow_t startseq(void *closure, upb_value fval) {
- buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval));
- return UPB_CONTINUE_WITH(closure);
+ indent(closure);
+ output.appendf("%" PRIu32 ":[\n", upb_value_getuint32(fval));
+ return UPB_CONTINUE_WITH(((int*)closure) + 1);
}
upb_flow_t endseq(void *closure, upb_value fval) {
- (void)fval;
- buffer_appendf(closure, "] ");
+ indent(closure);
+ output.append("]\n");
return UPB_CONTINUE;
}
+upb_flow_t startmsg(void *closure) {
+ indent(closure);
+ output.append("<\n");
+ return UPB_CONTINUE;
+}
+
+void endmsg(void *closure, upb_status *status) {
+ (void)status;
+ indent(closure);
+ output.append(">\n");
+}
+
void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
upb_value_handler *handler) {
upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
@@ -221,6 +294,9 @@ void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
}
void reghandlers(upb_mhandlers *m) {
+ upb_mhandlers_setstartmsg(m, &startmsg);
+ upb_mhandlers_setendmsg(m, &endmsg);
+
// Register handlers for each type.
reg(m, UPB_TYPE(DOUBLE), &value_double);
reg(m, UPB_TYPE(FLOAT), &value_float);
@@ -267,7 +343,7 @@ size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) {
}
upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
- upb_seamsrc *src = _src;
+ upb_seamsrc *src = (upb_seamsrc*)_src;
assert(ofs < src->len);
if (ofs == src->len) {
upb_status_seteof(&src->bytesrc.status);
@@ -279,7 +355,7 @@ upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
void upb_seamsrc_copy(const void *_src, uint64_t ofs,
size_t len, char *dst) {
- const upb_seamsrc *src = _src;
+ const upb_seamsrc *src = (const upb_seamsrc*)_src;
assert(ofs + len <= src->len);
memcpy(dst, src->str + ofs, len);
}
@@ -290,7 +366,7 @@ void upb_seamsrc_discard(void *src, uint64_t ofs) {
}
const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
- const upb_seamsrc *src = _s;
+ const upb_seamsrc *src = (const upb_seamsrc*)_s;
*len = upb_seamsrc_avail(src, ofs);
return src->str + ofs;
}
@@ -314,7 +390,7 @@ void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) {
}
void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) {
- ASSERT(seam1 <= seam2);
+ assert(seam1 <= seam2);
s->seam1 = seam1;
s->seam2 = seam2;
s->byteregion.discard = 0;
@@ -337,83 +413,68 @@ upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
/* Running of test cases ******************************************************/
upb_decoderplan *plan;
-
-void run_decoder(buffer *proto, buffer *expected_output) {
+#define LINE(x) x "\n"
+void run_decoder(const buffer& proto, const buffer* expected_output) {
upb_seamsrc src;
- upb_seamsrc_init(&src, proto->buf, proto->len);
+ upb_seamsrc_init(&src, proto.buf(), proto.len());
upb_decoder d;
upb_decoder_init(&d);
upb_decoder_resetplan(&d, plan, 0);
- for (size_t i = 0; i < proto->len; i++) {
- for (size_t j = i; j < proto->len; j++) {
+ for (size_t i = 0; i < proto.len(); i++) {
+ for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) {
upb_seamsrc_resetseams(&src, i, j);
upb_byteregion *input = upb_seamsrc_allbytes(&src);
- buffer *output = buffer_new(0);
- upb_decoder_resetinput(&d, input, output);
+ output.clear();
+ upb_decoder_resetinput(&d, input, &closures[0]);
upb_success_t success = UPB_SUSPENDED;
while (success == UPB_SUSPENDED)
success = upb_decoder_decode(&d);
ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
if (expected_output) {
- ASSERT(success == UPB_OK);
+ ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d));
// The input should be fully consumed.
ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input));
ASSERT(upb_byteregion_discardofs(input) ==
upb_byteregion_endofs(input));
- if (!buffer_eql(output, expected_output)) {
+ if (!output.eql(*expected_output)) {
fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
- output->buf, expected_output->buf);
+ output.buf(), expected_output->buf());
}
- ASSERT(strcmp(output->buf, expected_output->buf) == 0);
+ ASSERT(output.eql(*expected_output));
} else {
ASSERT(success == UPB_ERROR);
}
- buffer_free(output);
}
}
- upb_seamsrc_uninit(&src);
upb_decoder_uninit(&d);
- buffer_free(proto);
-}
-
-void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt,
- va_list args) {
- buffer *expected_text = buffer_new(0);
- size_t size = expected_text->len;
- expected_text->len += upb_vrprintf(&expected_text->buf, &size,
- expected_text->len, expected_fmt, args);
- run_decoder(proto, expected_text);
- buffer_free(expected_text);
+ upb_seamsrc_uninit(&src);
}
-void assert_does_not_parse_at_eof(buffer *proto) {
- run_decoder(proto, NULL);
-}
+const static buffer thirty_byte_nop = buffer(cat(
+ tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer(30)) ));
-void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) {
- // The JIT is only used for data >=20 bytes from end-of-buffer, so
- // repeat once with no-op padding data at the end of buffer.
- va_list args, args2;
+void assert_successful_parse(const buffer& proto,
+ const char *expected_fmt, ...) {
+ buffer expected_text;
+ va_list args;
va_start(args, expected_fmt);
- va_copy(args2, args);
- assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args);
- assert_successful_parse_at_eof(
- cat( proto,
- tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)),
- NULL ),
- expected_fmt, args2);
+ expected_text.vappendf(expected_fmt, args);
va_end(args);
- va_end(args2);
+ // The JIT is only used for data >=20 bytes from end-of-buffer, so
+ // repeat once with no-op padding data at the end of buffer.
+ run_decoder(proto, &expected_text);
+ run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
}
-void assert_does_not_parse(buffer *proto) {
+void assert_does_not_parse_at_eof(const buffer& proto) {
+ run_decoder(proto, NULL);
+}
+
+void assert_does_not_parse(const buffer& proto) {
// The JIT is only used for data >=20 bytes from end-of-buffer, so
// repeat once with no-op padding data at the end of buffer.
- assert_does_not_parse_at_eof(buffer_dup(proto));
- assert_does_not_parse_at_eof(
- cat( proto,
- tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)),
- NULL ));
+ assert_does_not_parse_at_eof(proto);
+ assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop ));
}
@@ -421,19 +482,19 @@ void assert_does_not_parse(buffer *proto) {
void test_premature_eof_for_type(upb_fieldtype_t type) {
// Incomplete values for each wire type.
- static const char *incompletes[] = {
- "\x80", // UPB_WIRE_TYPE_VARINT
- "abcdefg", // UPB_WIRE_TYPE_64BIT
- "\x80", // UPB_WIRE_TYPE_DELIMITED (partial length)
- NULL, // UPB_WIRE_TYPE_START_GROUP (no value required)
- NULL, // UPB_WIRE_TYPE_END_GROUP (no value required)
- "abc" // UPB_WIRE_TYPE_32BIT
+ static const buffer incompletes[6] = {
+ buffer("\x80"), // UPB_WIRE_TYPE_VARINT
+ buffer("abcdefg"), // UPB_WIRE_TYPE_64BIT
+ buffer("\x80"), // UPB_WIRE_TYPE_DELIMITED (partial length)
+ buffer(), // UPB_WIRE_TYPE_START_GROUP (no value required)
+ buffer(), // UPB_WIRE_TYPE_END_GROUP (no value required)
+ buffer("abc") // UPB_WIRE_TYPE_32BIT
};
uint32_t fieldnum = type;
uint32_t rep_fieldnum = rep_fn(type);
- int wire_type = upb_types[type].native_wire_type;
- const char *incomplete = incompletes[wire_type];
+ int wire_type = upb_decoder_types[type].native_wire_type;
+ const buffer& incomplete = incompletes[wire_type];
// EOF before a known non-repeated value.
assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
@@ -446,108 +507,128 @@ void test_premature_eof_for_type(upb_fieldtype_t type) {
// EOF inside a known non-repeated value.
assert_does_not_parse_at_eof(
- cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+ cat( tag(fieldnum, wire_type), incomplete ));
// EOF inside a known repeated value.
assert_does_not_parse_at_eof(
- cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+ cat( tag(rep_fieldnum, wire_type), incomplete ));
// EOF inside an unknown value.
assert_does_not_parse_at_eof(
- cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL ));
+ cat( tag(UNKNOWN_FIELD, wire_type), incomplete ));
if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
// EOF in the middle of delimited data for known non-repeated value.
assert_does_not_parse_at_eof(
- cat( tag(fieldnum, wire_type), varint(1), NULL ));
+ cat( tag(fieldnum, wire_type), varint(1) ));
// EOF in the middle of delimited data for known repeated value.
assert_does_not_parse_at_eof(
- cat( tag(rep_fieldnum, wire_type), varint(1), NULL ));
+ cat( tag(rep_fieldnum, wire_type), varint(1) ));
// EOF in the middle of delimited data for unknown value.
assert_does_not_parse_at_eof(
- cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL ));
+ cat( tag(UNKNOWN_FIELD, wire_type), varint(1) ));
if (type == UPB_TYPE(MESSAGE)) {
// Submessage ends in the middle of a value.
- buffer *incomplete_submsg =
+ buffer incomplete_submsg =
cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT),
- buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL );
+ incompletes[UPB_WIRE_TYPE_VARINT] );
assert_does_not_parse(
cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
- varint(incomplete_submsg->len),
- incomplete_submsg, NULL ));
+ varint(incomplete_submsg.len()),
+ incomplete_submsg ));
}
} else {
// Packed region ends in the middle of a value.
assert_does_not_parse(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
- varint(strlen(incomplete)),
- buffer_new3(incomplete), NULL ));
+ varint(incomplete.len()),
+ incomplete ));
// EOF in the middle of packed region.
assert_does_not_parse_at_eof(
- cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL ));
+ cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) ));
}
}
// "33" and "66" are just two random values that all numeric types can
// represent.
void test_valid_data_for_type(upb_fieldtype_t type,
- buffer *enc33, buffer *enc66) {
+ const buffer& enc33, const buffer& enc66) {
uint32_t fieldnum = type;
uint32_t rep_fieldnum = rep_fn(type);
- int wire_type = upb_types[type].native_wire_type;
+ int wire_type = upb_decoder_types[type].native_wire_type;
// Non-repeated
assert_successful_parse(
- cat( tag(fieldnum, wire_type), buffer_dup(enc33),
- tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
- "%u:33; %u:66; ", fieldnum, fieldnum);
+ cat( tag(fieldnum, wire_type), enc33,
+ tag(fieldnum, wire_type), enc66 ),
+ LINE("<")
+ LINE("%u:33")
+ LINE("%u:66")
+ LINE(">"), fieldnum, fieldnum);
// Non-packed repeated.
assert_successful_parse(
- cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
- tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
- "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+ cat( tag(rep_fieldnum, wire_type), enc33,
+ tag(rep_fieldnum, wire_type), enc66 ),
+ LINE("<")
+ LINE("%u:[")
+ LINE(" %u:33")
+ LINE(" %u:66")
+ LINE("]")
+ LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
// Packed repeated.
assert_successful_parse(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
- delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
- "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
-
- buffer_free(enc33);
- buffer_free(enc66);
+ delim(cat( enc33, enc66 )) ),
+ LINE("<")
+ LINE("%u:[")
+ LINE(" %u:33")
+ LINE(" %u:66")
+ LINE("]")
+ LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
}
void test_valid_data_for_signed_type(upb_fieldtype_t type,
- buffer *enc33, buffer *enc66) {
+ const buffer& enc33, const buffer& enc66) {
uint32_t fieldnum = type;
uint32_t rep_fieldnum = rep_fn(type);
- int wire_type = upb_types[type].native_wire_type;
+ int wire_type = upb_decoder_types[type].native_wire_type;
// Non-repeated
assert_successful_parse(
- cat( tag(fieldnum, wire_type), buffer_dup(enc33),
- tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
- "%u:33; %u:-66; ", fieldnum, fieldnum);
+ cat( tag(fieldnum, wire_type), enc33,
+ tag(fieldnum, wire_type), enc66 ),
+ LINE("<")
+ LINE("%u:33")
+ LINE("%u:-66")
+ LINE(">"), fieldnum, fieldnum);
// Non-packed repeated.
assert_successful_parse(
- cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
- tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
- "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+ cat( tag(rep_fieldnum, wire_type), enc33,
+ tag(rep_fieldnum, wire_type), enc66 ),
+ LINE("<")
+ LINE("%u:[")
+ LINE(" %u:33")
+ LINE(" %u:-66")
+ LINE("]")
+ LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
// Packed repeated.
assert_successful_parse(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
- delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
- "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
-
- buffer_free(enc33);
- buffer_free(enc66);
+ delim(cat( enc33, enc66 )) ),
+ LINE("<")
+ LINE("%u:[")
+ LINE(" %u:33")
+ LINE(" %u:-66")
+ LINE("]")
+ LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
}
// Test that invalid protobufs are properly detected (without crashing) and
@@ -571,7 +652,7 @@ void test_invalid() {
test_premature_eof_for_type(UPB_TYPE(SINT64));
// EOF inside a tag's varint.
- assert_does_not_parse_at_eof( buffer_new3("\x80") );
+ assert_does_not_parse_at_eof( buffer("\x80") );
// EOF inside a known group.
assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
@@ -584,33 +665,19 @@ void test_invalid() {
// Field number is 0.
assert_does_not_parse(
- cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL ));
+ cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));
// Field number is too large.
assert_does_not_parse(
cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
- varint(0), NULL ));
+ varint(0) ));
// Test exceeding the resource limit of stack depth.
- buffer *buf = buffer_new3("");
+ buffer buf;
for (int i = 0; i < UPB_MAX_NESTING; i++) {
- buf = submsg(UPB_TYPE(MESSAGE), buf);
+ buf.assign(submsg(UPB_TYPE(MESSAGE), buf));
}
assert_does_not_parse(buf);
-
- // Staying within the stack limit should work properly.
- buf = buffer_new3("");
- buffer *textbuf = buffer_new3("");
- int total = UPB_MAX_NESTING - 1;
- for (int i = 0; i < total; i++) {
- buf = submsg(UPB_TYPE(MESSAGE), buf);
- buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE));
- }
- for (int i = 0; i < total; i++) {
- buffer_appendf(textbuf, "} ");
- }
- assert_successful_parse(buf, "%s", textbuf->buf);
- buffer_free(textbuf);
}
void test_valid() {
@@ -629,16 +696,80 @@ void test_valid() {
test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66));
test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66));
+ // Test implicit startseq/endseq.
+ uint32_t repfl_fn = rep_fn(UPB_TYPE(FLOAT));
+ uint32_t repdb_fn = rep_fn(UPB_TYPE(DOUBLE));
+ assert_successful_parse(
+ cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33),
+ tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ),
+ LINE("<")
+ LINE("%u:[")
+ LINE(" %u:33")
+ LINE("]")
+ LINE("%u:[")
+ LINE(" %u:66")
+ LINE("]")
+ LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn);
+
// Submessage tests.
uint32_t msg_fn = UPB_TYPE(MESSAGE);
assert_successful_parse(
- submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))),
- "%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn);
+ submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer()))),
+ LINE("<")
+ LINE("%u:{")
+ LINE(" <")
+ LINE(" %u:{")
+ LINE(" <")
+ LINE(" %u:{")
+ LINE(" <")
+ LINE(" >")
+ LINE(" }")
+ LINE(" >")
+ LINE(" }")
+ LINE(" >")
+ LINE("}")
+ LINE(">"), msg_fn, msg_fn, msg_fn);
uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE));
assert_successful_parse(
- submsg(repm_fn, submsg(repm_fn, buffer_new3(""))),
- "%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn);
+ submsg(repm_fn, submsg(repm_fn, buffer())),
+ LINE("<")
+ LINE("%u:[")
+ LINE(" %u:{")
+ LINE(" <")
+ LINE(" %u:[")
+ LINE(" %u:{")
+ LINE(" <")
+ LINE(" >")
+ LINE(" }")
+ LINE(" ]")
+ LINE(" >")
+ LINE(" }")
+ LINE("]")
+ LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn);
+
+ // Staying within the stack limit should work properly.
+ buffer buf;
+ buffer textbuf;
+ int total = UPB_MAX_NESTING - 1;
+ for (int i = 0; i < total; i++) {
+ buf.assign(submsg(UPB_TYPE(MESSAGE), buf));
+ indentbuf(&textbuf, i);
+ textbuf.append("<\n");
+ indentbuf(&textbuf, i);
+ textbuf.appendf("%u:{\n", UPB_TYPE(MESSAGE));
+ }
+ indentbuf(&textbuf, total);
+ textbuf.append("<\n");
+ indentbuf(&textbuf, total);
+ textbuf.append(">\n");
+ for (int i = 0; i < total; i++) {
+ indentbuf(&textbuf, total - i - 1);
+ textbuf.append("}\n");
+ indentbuf(&textbuf, total - i - 1);
+ textbuf.append(">\n");
+ }
+ assert_successful_parse(buf, "%s", textbuf.buf());
}
void run_tests() {
@@ -647,10 +778,17 @@ void run_tests() {
}
int main() {
+ for (int i = 0; i < UPB_MAX_NESTING; i++) {
+ closures[i] = i;
+ }
// Construct decoder plan.
upb_handlers *h = upb_handlers_new();
reghandlers(upb_handlers_newmhandlers(h));
+ // Create an empty handlers to make sure that the decoder can handle empty
+ // messages.
+ upb_handlers_newmhandlers(h);
+
// Test without JIT.
plan = upb_decoderplan_new(h, false);
run_tests();
@@ -658,6 +796,11 @@ int main() {
// Test JIT.
plan = upb_decoderplan_new(h, true);
+#ifdef UPB_USE_JIT_X64
+ ASSERT(upb_decoderplan_hasjitcode(plan));
+#else
+ ASSERT(!upb_decoderplan_hasjitcode(plan));
+#endif
run_tests();
upb_decoderplan_unref(plan);
diff --git a/tests/test_def.c b/tests/test_def.c
index 3ca3064..698532e 100644
--- a/tests/test_def.c
+++ b/tests/test_def.c
@@ -1,19 +1,174 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ *
+ * Test of defs and symtab. There should be far more tests of edge conditions
+ * (like attempts to link defs that don't have required properties set).
+ */
-#undef NDEBUG /* ensure tests always assert. */
#include "upb/def.h"
+#include "upb/pb/glue.h"
+#include "upb_test.h"
#include <stdlib.h>
+#include <string.h>
-int main() {
- upb_symtab *s = upb_symtab_new();
+const char *descriptor_file;
- // Will be empty atm since we haven't added anything to the symtab.
+static void test_empty_symtab() {
+ upb_symtab *s = upb_symtab_new();
int count;
- const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY);
- for (int i = 0; i < count; i++) {
- upb_def_unref(defs[i]);
- }
+ const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL);
+ ASSERT(count == 0);
free(defs);
+ upb_symtab_unref(s);
+}
+static upb_symtab *load_test_proto() {
+ upb_symtab *s = upb_symtab_new();
+ ASSERT(s);
+ upb_status status = UPB_STATUS_INIT;
+ if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
+ fprintf(stderr, "Error loading descriptor file: %s\n",
+ upb_status_getstr(&status));
+ exit(1);
+ }
+ upb_status_uninit(&status);
+ return s;
+}
+
+static void test_cycles() {
+ upb_symtab *s = load_test_proto();
+
+ // Test cycle detection by making a cyclic def's main refcount go to zero
+ // and then be incremented to one again.
+ const upb_def *def = upb_symtab_lookup(s, "A", &def);
+ ASSERT(def);
+ ASSERT(upb_def_isfinalized(def));
upb_symtab_unref(s);
+
+ // Message A has only one subfield: "optional B b = 1".
+ const upb_msgdef *m = upb_downcast_msgdef_const(def);
+ upb_fielddef *f = upb_msgdef_itof(m, 1);
+ ASSERT(f);
+ ASSERT(upb_hassubdef(f));
+ const upb_def *def2 = upb_fielddef_subdef(f);
+ ASSERT(upb_downcast_msgdef_const(def2));
+ ASSERT(strcmp(upb_def_fullname(def2), "B") == 0);
+
+ upb_def_ref(def2, &def2);
+ upb_def_unref(def, &def);
+ upb_def_unref(def2, &def2);
+}
+
+static void test_fielddef_unref() {
+ upb_symtab *s = load_test_proto();
+ const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md);
+ upb_fielddef *f = upb_msgdef_itof(md, 1);
+ upb_fielddef_ref(f, &f);
+
+ // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef
+ // alive.
+ upb_symtab_unref(s);
+ upb_msgdef_unref(md, &md);
+ // Check that md is still alive.
+ ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0);
+
+ // Check that unref of fielddef frees the whole remaining graph.
+ upb_fielddef_unref(f, &f);
+}
+
+static void test_fielddef_accessors() {
+ upb_fielddef *f1 = upb_fielddef_new(&f1);
+ upb_fielddef *f2 = upb_fielddef_new(&f2);
+
+ ASSERT(upb_fielddef_ismutable(f1));
+ upb_fielddef_setname(f1, "f1");
+ upb_fielddef_setnumber(f1, 1937);
+ upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
+ upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
+ ASSERT(upb_fielddef_number(f1) == 1937);
+
+ ASSERT(upb_fielddef_ismutable(f2));
+ upb_fielddef_setname(f2, "f2");
+ upb_fielddef_setnumber(f2, 1572);
+ upb_fielddef_settype(f2, UPB_TYPE(BYTES));
+ upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED));
+ ASSERT(upb_fielddef_number(f2) == 1572);
+
+ upb_fielddef_unref(f1, &f1);
+ upb_fielddef_unref(f2, &f2);
+}
+
+static upb_fielddef *newfield(
+ const char *name, int32_t num, uint8_t type, uint8_t label,
+ const char *type_name, void *owner) {
+ upb_fielddef *f = upb_fielddef_new(owner);
+ upb_fielddef_setname(f, name);
+ upb_fielddef_setnumber(f, num);
+ upb_fielddef_settype(f, type);
+ upb_fielddef_setlabel(f, label);
+ upb_fielddef_setsubtypename(f, type_name);
+ return f;
+}
+
+static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) {
+ upb_msgdef *m = upb_msgdef_new(owner);
+ upb_def_setfullname(UPB_UPCAST(m), name);
+ return m;
+}
+
+INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) {
+ upb_enumdef *e = upb_enumdef_new(owner);
+ upb_def_setfullname(UPB_UPCAST(e), name);
+ return e;
+}
+
+void test_replacement() {
+ upb_symtab *s = upb_symtab_new();
+
+ upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s);
+ upb_msgdef_addfield(m, newfield(
+ "field1", 1, UPB_TYPE(ENUM), UPB_LABEL(OPTIONAL), ".MyEnum", &s), &s);
+ upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s);
+ upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s);
+
+ upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)};
+ upb_status status = UPB_STATUS_INIT;
+ ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status);
+
+ // Try adding a new definition of MyEnum, MyMessage should get replaced with
+ // a new version.
+ upb_enumdef *e2 = upb_enumdef_new(&s);
+ upb_def_setfullname(UPB_UPCAST(e2), "MyEnum");
+ upb_def *newdefs2[] = {UPB_UPCAST(e2)};
+ ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status);
+
+ const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage", &m3);
+ ASSERT(m3);
+ // Must be different because it points to MyEnum which was replaced.
+ ASSERT(m3 != m);
+ upb_msgdef_unref(m3, &m3);
+
+ m3 = upb_symtab_lookupmsg(s, "MyMessage2", &m3);
+ // Should be the same because it was not replaced, nor were any defs that
+ // are reachable from it.
+ ASSERT(m3 == m2);
+ upb_msgdef_unref(m3, &m3);
+
+ upb_symtab_unref(s);
+}
+
+int main(int argc, char *argv[]) {
+ if (argc < 2) {
+ fprintf(stderr, "Usage: test_def <test.proto.pb>\n");
+ return 1;
+ }
+ descriptor_file = argv[1];
+ test_empty_symtab();
+ test_cycles();
+ test_fielddef_accessors();
+ test_fielddef_unref();
+ test_replacement();
return 0;
}
diff --git a/tests/test_table.cc b/tests/test_table.cc
index 47e083f..2538e35 100644
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@@ -1,8 +1,11 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc. See LICENSE for details.
+ *
+ * Tests for upb_table.
+ */
-#undef NDEBUG /* ensure tests always assert. */
-#include "upb/table.h"
-#include "test_util.h"
-#include <assert.h>
#include <string.h>
#include <sys/resource.h>
#include <ext/hash_map>
@@ -11,55 +14,45 @@
#include <set>
#include <string>
#include <vector>
+#include "tests/test_util.h"
+#include "tests/upb_test.h"
+#include "upb/table.h"
bool benchmark = false;
#define CPU_TIME_PER_TEST 0.5
using std::vector;
-typedef struct {
- uint32_t value; /* key*2 */
-} inttable_entry;
-
-typedef struct {
- int32_t value; /* ASCII Value of first letter */
-} strtable_entry;
-
-double get_usertime()
-{
+double get_usertime() {
struct rusage usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec/1000000.0);
}
/* num_entries must be a power of 2. */
-void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert)
-{
+void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert) {
/* Initialize structures. */
upb_strtable table;
std::map<std::string, int32_t> m;
- upb_strtable_init(&table, 0, sizeof(strtable_entry));
+ upb_strtable_init(&table);
std::set<std::string> all;
for(size_t i = 0; i < num_to_insert; i++) {
const std::string& key = keys[i];
all.insert(key);
- strtable_entry e;
- e.value = key[0];
- upb_strtable_insert(&table, key.c_str(), &e);
+ upb_strtable_insert(&table, key.c_str(), upb_value_int32(key[0]));
m[key] = key[0];
}
/* Test correctness. */
for(uint32_t i = 0; i < keys.size(); i++) {
const std::string& key = keys[i];
- strtable_entry *e =
- (strtable_entry*)upb_strtable_lookup(&table, key.c_str());
+ const upb_value *v = upb_strtable_lookup(&table, key.c_str());
if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
- assert(e);
- assert(e->value == key[0]);
- assert(m[key] == key[0]);
+ ASSERT(v);
+ ASSERT(upb_value_getint32(*v) == key[0]);
+ ASSERT(m[key] == key[0]);
} else {
- assert(e == NULL);
+ ASSERT(v == NULL);
}
}
@@ -69,66 +62,83 @@ void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert)
const char *key = upb_strtable_iter_key(&iter);
std::string tmp(key, strlen(key));
std::set<std::string>::iterator i = all.find(tmp);
- assert(i != all.end());
+ ASSERT(i != all.end());
all.erase(i);
}
- assert(all.empty());
+ ASSERT(all.empty());
- upb_strtable_free(&table);
+ upb_strtable_uninit(&table);
}
/* num_entries must be a power of 2. */
-void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
-{
+void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
/* Initialize structures. */
upb_inttable table;
uint32_t largest_key = 0;
std::map<uint32_t, uint32_t> m;
__gnu_cxx::hash_map<uint32_t, uint32_t> hm;
- upb_inttable_init(&table, num_entries, sizeof(inttable_entry));
+ upb_inttable_init(&table);
for(size_t i = 0; i < num_entries; i++) {
int32_t key = keys[i];
largest_key = UPB_MAX((int32_t)largest_key, key);
- inttable_entry e;
- e.value = (key*2) << 1;
- upb_inttable_insert(&table, key, &e);
+ upb_inttable_insert(&table, key, upb_value_uint32(key * 2));
m[key] = key*2;
hm[key] = key*2;
}
/* Test correctness. */
for(uint32_t i = 0; i <= largest_key; i++) {
- inttable_entry *e = (inttable_entry*)upb_inttable_lookup(
- &table, i);
+ const upb_value *v = upb_inttable_lookup(&table, i);
+ if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
+ ASSERT(v);
+ ASSERT(upb_value_getuint32(*v) == i*2);
+ ASSERT(m[i] == i*2);
+ ASSERT(hm[i] == i*2);
+ } else {
+ ASSERT(v == NULL);
+ }
+ }
+
+ for(uint16_t i = 0; i < num_entries; i += 2) {
+ upb_value val;
+ bool ret = upb_inttable_remove(&table, keys[i], &val);
+ ASSERT(ret == (m.erase(keys[i]) == 1));
+ if (ret) ASSERT(upb_value_getuint32(val) == keys[i] * 2);
+ hm.erase(keys[i]);
+ m.erase(keys[i]);
+ }
+
+ ASSERT(upb_inttable_count(&table) == hm.size());
+
+ /* Test correctness. */
+ for(uint32_t i = 0; i <= largest_key; i++) {
+ const upb_value *v = upb_inttable_lookup(&table, i);
if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
- assert(e);
- //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value);
- assert(((e->value) >> 1) == i*2);
- assert(m[i] == i*2);
- assert(hm[i] == i*2);
+ ASSERT(v);
+ ASSERT(upb_value_getuint32(*v) == i*2);
+ ASSERT(m[i] == i*2);
+ ASSERT(hm[i] == i*2);
} else {
- assert(e == NULL);
+ ASSERT(v == NULL);
}
}
// Compact and test correctness again.
upb_inttable_compact(&table);
for(uint32_t i = 0; i <= largest_key; i++) {
- inttable_entry *e = (inttable_entry*)upb_inttable_lookup(
- &table, i);
+ const upb_value *v = upb_inttable_lookup(&table, i);
if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
- assert(e);
- //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value);
- assert(((e->value) >> 1) == i*2);
- assert(m[i] == i*2);
- assert(hm[i] == i*2);
+ ASSERT(v);
+ ASSERT(upb_value_getuint32(*v) == i*2);
+ ASSERT(m[i] == i*2);
+ ASSERT(hm[i] == i*2);
} else {
- assert(e == NULL);
+ ASSERT(v == NULL);
}
}
if(!benchmark) {
- upb_inttable_free(&table);
+ upb_inttable_uninit(&table);
return;
}
@@ -141,7 +151,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
}
for(uint16_t i = num_entries - 1; i >= 1; i--) {
uint16_t rand_i = (random() / (double)RAND_MAX) * i;
- assert(rand_i <= i);
+ ASSERT(rand_i <= i);
uint16_t tmp = rand_order[rand_i];
rand_order[rand_i] = rand_order[i];
rand_order[i] = tmp;
@@ -162,8 +172,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[i & mask];
- inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key);
- x += (uintptr_t)e;
+ const upb_value *v = upb_inttable_lookup32(&table, key);
+ x += (uintptr_t)v;
}
double total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
@@ -174,8 +184,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[rand_order[i & mask]];
- inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key);
- x += (uintptr_t)e;
+ const upb_value *v = upb_inttable_lookup32(&table, key);
+ x += (uintptr_t)v;
}
total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
@@ -223,20 +233,18 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
}
total = get_usertime() - before;
printf("%s/s\n\n", eng(i/total, 3, false));
- upb_inttable_free(&table);
+ upb_inttable_uninit(&table);
delete rand_order;
}
-int32_t *get_contiguous_keys(int32_t num)
-{
+int32_t *get_contiguous_keys(int32_t num) {
int32_t *buf = new int32_t[num];
for(int32_t i = 0; i < num; i++)
- buf[i] = i+1;
+ buf[i] = i;
return buf;
}
-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
}
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 53b2498..020dca5 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -1,7 +1,7 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc. See LICENSE for details.
*
* A test that verifies that our results are identical to proto2 for a
* given proto type and input protobuf.
@@ -9,230 +9,87 @@
#define __STDC_LIMIT_MACROS // So we get UINT32_MAX
#include <assert.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/wire_format_lite.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include <google/protobuf/descriptor.h>
-#include <google/protobuf/wire_format_lite.h>
#include "benchmarks/google_messages.pb.h"
-#include "upb/def.h"
-#include "upb/msg.h"
+#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/msg.hpp"
+#include "upb/pb/decoder.hpp"
#include "upb/pb/glue.h"
#include "upb/pb/varint.h"
+#include "upb/proto2_bridge.hpp"
#include "upb_test.h"
-size_t string_size;
-
-void compare(const google::protobuf::Message& proto2_msg,
- void *upb_msg, const upb_msgdef *upb_md);
-
-void compare_arrays(const google::protobuf::Reflection *r,
- const google::protobuf::Message& proto2_msg,
- const google::protobuf::FieldDescriptor *proto2_f,
- void *upb_msg, upb_fielddef *upb_f)
-{
- ASSERT(upb_msg_has(upb_msg, upb_f));
- ASSERT(upb_isseq(upb_f));
- const void *arr = upb_value_getptr(upb_msg_getseq(upb_msg, upb_f));
- const void *iter = upb_seq_begin(arr, upb_f);
- for(int i = 0;
- i < r->FieldSize(proto2_msg, proto2_f);
- i++, iter = upb_seq_next(arr, iter, upb_f)) {
- ASSERT(!upb_seq_done(iter));
- upb_value v = upb_seq_get(iter, upb_f);
- switch(upb_f->type) {
- default:
- ASSERT(false);
- case UPB_TYPE(DOUBLE):
- ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == upb_value_getdouble(v));
- break;
- case UPB_TYPE(FLOAT):
- ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == upb_value_getfloat(v));
- break;
- case UPB_TYPE(INT64):
- case UPB_TYPE(SINT64):
- case UPB_TYPE(SFIXED64):
- ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == upb_value_getint64(v));
- break;
- case UPB_TYPE(UINT64):
- case UPB_TYPE(FIXED64):
- ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == upb_value_getuint64(v));
- break;
- case UPB_TYPE(SFIXED32):
- case UPB_TYPE(SINT32):
- case UPB_TYPE(INT32):
- case UPB_TYPE(ENUM):
- ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == upb_value_getint32(v));
- break;
- case UPB_TYPE(FIXED32):
- case UPB_TYPE(UINT32):
- ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == upb_value_getuint32(v));
- break;
- case UPB_TYPE(BOOL):
- ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == upb_value_getbool(v));
- break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES): {
- std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
- upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
- std::string str2(upbstr->ptr, upbstr->len);
- string_size += upbstr->len;
- ASSERT(str == str2);
- break;
- }
- case UPB_TYPE(GROUP):
- case UPB_TYPE(MESSAGE):
- ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
- compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
- upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
- }
- }
- ASSERT(upb_seq_done(iter));
-}
-
-void compare_values(const google::protobuf::Reflection *r,
- const google::protobuf::Message& proto2_msg,
- const google::protobuf::FieldDescriptor *proto2_f,
- void *upb_msg, upb_fielddef *upb_f)
-{
- upb_value v = upb_msg_get(upb_msg, upb_f);
- switch(upb_f->type) {
- default:
- ASSERT(false);
- case UPB_TYPE(DOUBLE):
- ASSERT(r->GetDouble(proto2_msg, proto2_f) == upb_value_getdouble(v));
- break;
- case UPB_TYPE(FLOAT):
- ASSERT(r->GetFloat(proto2_msg, proto2_f) == upb_value_getfloat(v));
- break;
- case UPB_TYPE(INT64):
- case UPB_TYPE(SINT64):
- case UPB_TYPE(SFIXED64):
- ASSERT(r->GetInt64(proto2_msg, proto2_f) == upb_value_getint64(v));
- break;
- case UPB_TYPE(UINT64):
- case UPB_TYPE(FIXED64):
- ASSERT(r->GetUInt64(proto2_msg, proto2_f) == upb_value_getuint64(v));
- break;
- case UPB_TYPE(SFIXED32):
- case UPB_TYPE(SINT32):
- case UPB_TYPE(INT32):
- case UPB_TYPE(ENUM):
- ASSERT(r->GetInt32(proto2_msg, proto2_f) == upb_value_getint32(v));
- break;
- case UPB_TYPE(FIXED32):
- case UPB_TYPE(UINT32):
- ASSERT(r->GetUInt32(proto2_msg, proto2_f) == upb_value_getuint32(v));
- break;
- case UPB_TYPE(BOOL):
- ASSERT(r->GetBool(proto2_msg, proto2_f) == upb_value_getbool(v));
- break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES): {
- std::string str = r->GetString(proto2_msg, proto2_f);
- upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
- std::string str2(upbstr->ptr, upbstr->len);
- string_size += upbstr->len;
- ASSERT(str == str2);
- break;
- }
- case UPB_TYPE(GROUP):
- case UPB_TYPE(MESSAGE):
- // XXX: getstr
- compare(r->GetMessage(proto2_msg, proto2_f),
- upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
- }
-}
-
-void compare(const google::protobuf::Message& proto2_msg,
- void *upb_msg, const upb_msgdef *upb_md)
-{
- const google::protobuf::Reflection *r = proto2_msg.GetReflection();
- const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor();
-
- ASSERT(d->field_count() == upb_msgdef_numfields(upb_md));
- upb_msg_iter i;
- for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) {
- upb_fielddef *upb_f = upb_msg_iter_field(i);
+void compare_metadata(const google::protobuf::Descriptor* d,
+ const upb::MessageDef *upb_md) {
+ ASSERT(d->field_count() == upb_md->field_count());
+ for (upb::MessageDef::ConstIterator i(upb_md); !i.Done(); i.Next()) {
+ const upb::FieldDef* upb_f = i.field();
const google::protobuf::FieldDescriptor *proto2_f =
- d->FindFieldByNumber(upb_f->number);
- // Make sure the definitions are equal.
+ d->FindFieldByNumber(upb_f->number());
ASSERT(upb_f);
ASSERT(proto2_f);
- ASSERT(upb_f->number == proto2_f->number());
- ASSERT(std::string(upb_f->name) == proto2_f->name());
- ASSERT(upb_f->type == proto2_f->type());
- ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated());
-
- if(!upb_msg_has(upb_msg, upb_f)) {
- if(upb_isseq(upb_f))
- ASSERT(r->FieldSize(proto2_msg, proto2_f) == 0);
- else
- ASSERT(r->HasField(proto2_msg, proto2_f) == false);
- } else {
- if(upb_isseq(upb_f)) {
- compare_arrays(r, proto2_msg, proto2_f, upb_msg, upb_f);
- } else {
- ASSERT(r->HasField(proto2_msg, proto2_f) == true);
- compare_values(r, proto2_msg, proto2_f, upb_msg, upb_f);
- }
- }
+ ASSERT(upb_f->number() == proto2_f->number());
+ ASSERT(std::string(upb_f->name()) == proto2_f->name());
+ ASSERT(upb_f->type() == static_cast<upb::FieldType>(proto2_f->type()));
+ ASSERT(upb_f->IsSequence() == proto2_f->is_repeated());
}
}
-void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
- void *upb_msg, const upb_msgdef *upb_md,
- const char *str, size_t len, bool allow_jit)
-{
+void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2,
+ const upb::MessageDef *upb_md,
+ const char *str, size_t len, bool allow_jit) {
// Parse to both proto2 and upb.
- ASSERT(proto2_msg->ParseFromArray(str, len));
- upb_status status = UPB_STATUS_INIT;
- upb_msg_clear(upb_msg, upb_md);
- upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status);
- if (!upb_ok(&status)) {
- fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status));
- exit(1);
- }
- string_size = 0;
- compare(*proto2_msg, upb_msg, upb_md);
- printf("Total size: %zd, string size: %zd (%0.2f%%)\n", len,
- string_size, (double)string_size / len * 100);
- upb_status_uninit(&status);
+ ASSERT(msg1->ParseFromArray(str, len));
+
+ upb::Handlers* handlers = upb::Handlers::New();
+ upb::RegisterWriteHandlers(handlers, upb_md);
+ upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit);
+ upb::StringSource src(str, len);
+ upb::Decoder decoder;
+ decoder.ResetPlan(plan, 0);
+ decoder.ResetInput(src.AllBytes(), msg2);
+ msg2->Clear();
+ ASSERT(decoder.Decode() == UPB_OK);
+ plan->Unref();
+ handlers->Unref();
+
+ // Would like to just compare the message objects themselves, but
+ // unfortunately MessageDifferencer is not part of the open-source release of
+ // proto2, so we compare their serialized strings, which we expect will be
+ // equivalent.
+ std::string str1;
+ std::string str2;
+ msg1->SerializeToString(&str1);
+ msg2->SerializeToString(&str2);
+ ASSERT(str1 == str2);
+ ASSERT(std::string(str, len) == str2);
}
-int main(int argc, char *argv[])
-{
- if (argc < 3) {
- fprintf(stderr, "Usage: test_vs_proto2 <descriptor file> <message file>\n");
- return 1;
+void test_zig_zag() {
+ for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+ ASSERT(upb_zzenc_64(num) ==
+ google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
+ if (num < UINT32_MAX) {
+ ASSERT(upb_zzenc_32(num) ==
+ google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
+ }
}
- const char *descriptor_file = argv[1];
- const char *message_file = argv[2];
- // Initialize upb state, parse descriptor.
- upb_status status = UPB_STATUS_INIT;
- upb_symtab *symtab = upb_symtab_new();
- size_t fds_len;
- const char *fds = upb_readfile(descriptor_file, &fds_len);
- if(fds == NULL) {
- fprintf(stderr, "Couldn't read %s.\n", descriptor_file);
- return 1;
- }
- upb_load_descriptor_into_symtab(symtab, fds, fds_len, &status);
- if(!upb_ok(&status)) {
- fprintf(stderr, "Error importing %s: %s", descriptor_file,
- upb_status_getstr(&status));
- return 1;
- }
- free((void*)fds);
+}
- const upb_def *def = upb_symtab_lookup(symtab, MESSAGE_NAME);
- const upb_msgdef *msgdef;
- if(!def || !(msgdef = upb_dyncast_msgdef_const(def))) {
- fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
+int main(int argc, char *argv[])
+{
+ if (argc < 2) {
+ fprintf(stderr, "Usage: test_vs_proto2 <message file>\n");
return 1;
}
+ const char *message_file = argv[1];
// Read the message data itself.
size_t len;
@@ -242,32 +99,25 @@ int main(int argc, char *argv[])
return 1;
}
+ MESSAGE_CIDENT msg1;
+ MESSAGE_CIDENT msg2;
+
+ const upb::MessageDef* m = upb::proto2_bridge::NewFinalMessageDef(msg1, &m);
+
+ compare_metadata(msg1.GetDescriptor(), m);
+
// Run twice to test proper object reuse.
- MESSAGE_CIDENT proto2_msg;
- void *upb_msg = upb_stdmsg_new(msgdef);
- parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
- parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
- parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
- parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
+ parse_and_compare(&msg1, &msg2, m, str, len, true);
+ parse_and_compare(&msg1, &msg2, m, str, len, false);
+ parse_and_compare(&msg1, &msg2, m, str, len, true);
+ parse_and_compare(&msg1, &msg2, m, str, len, false);
printf("All tests passed, %d assertions.\n", num_assertions);
- upb_stdmsg_free(upb_msg, msgdef);
- upb_def_unref(UPB_UPCAST(msgdef));
+ m->Unref(&m);
free((void*)str);
- upb_symtab_unref(symtab);
- upb_status_uninit(&status);
- // Test Zig-Zag encoding/decoding.
- for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
- ASSERT(upb_zzenc_64(num) ==
- google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
- if (num < UINT32_MAX) {
- ASSERT(upb_zzenc_32(num) ==
- google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
- }
- }
+ test_zig_zag();
google::protobuf::ShutdownProtobufLibrary();
-
return 0;
}
diff --git a/tests/tests.c b/tests/tests.c
deleted file mode 100644
index 12ff4bb..0000000
--- a/tests/tests.c
+++ /dev/null
@@ -1,121 +0,0 @@
-
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "upb/def.h"
-#include "upb/handlers.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-#include "upb_test.h"
-
-const char *descriptor_file;
-
-static upb_symtab *load_test_proto() {
- upb_symtab *s = upb_symtab_new();
- ASSERT(s);
- upb_status status = UPB_STATUS_INIT;
- if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
- fprintf(stderr, "Error loading descriptor file: %s\n",
- upb_status_getstr(&status));
- exit(1);
- }
- upb_status_uninit(&status);
- return s;
-}
-
-static upb_flow_t upb_test_onvalue(void *c, upb_value fval, upb_value val) {
- (void)c;
- (void)fval;
- (void)val;
- return UPB_CONTINUE;
-}
-
-static void test_upb_jit() {
- upb_symtab *s = load_test_proto();
- const upb_def *def = upb_symtab_lookup(s, "SimplePrimitives");
- ASSERT(def);
-
- upb_handlers *h = upb_handlers_new();
- upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
- upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset);
- upb_decoderplan *p = upb_decoderplan_new(h, true);
-#ifdef UPB_USE_JIT_X64
- ASSERT(upb_decoderplan_hasjitcode(p));
-#else
- ASSERT(!upb_decoderplan_hasjitcode(p));
-#endif
- upb_decoderplan_unref(p);
- upb_symtab_unref(s);
- upb_def_unref(def);
- upb_handlers_unref(h);
-}
-
-static void test_upb_symtab() {
- upb_symtab *s = load_test_proto();
-
- // Test cycle detection by making a cyclic def's main refcount go to zero
- // and then be incremented to one again.
- const upb_def *def = upb_symtab_lookup(s, "A");
- ASSERT(def);
- upb_symtab_unref(s);
- const upb_msgdef *m = upb_downcast_msgdef_const(def);
- upb_msg_iter i = upb_msg_begin(m);
- ASSERT(!upb_msg_done(i));
- upb_fielddef *f = upb_msg_iter_field(i);
- ASSERT(upb_hassubdef(f));
- upb_def *def2 = f->def;
-
- i = upb_msg_next(m, i);
- ASSERT(upb_msg_done(i)); // "A" should only have one field.
-
- ASSERT(upb_downcast_msgdef(def2));
- upb_def_ref(def2);
- upb_def_unref(def);
- upb_def_unref(def2);
-}
-
-static void test_upb_two_fielddefs() {
- upb_fielddef *f1 = upb_fielddef_new();
- upb_fielddef *f2 = upb_fielddef_new();
-
- ASSERT(upb_fielddef_ismutable(f1));
- upb_fielddef_setname(f1, "");
- upb_fielddef_setnumber(f1, 1937);
- upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
- upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
- upb_fielddef_settypename(f1, "");
- ASSERT(upb_fielddef_number(f1) == 1937);
-
- ASSERT(upb_fielddef_ismutable(f2));
- upb_fielddef_setname(f2, "");
- upb_fielddef_setnumber(f2, 1572);
- upb_fielddef_settype(f2, UPB_TYPE(BYTES));
- upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED));
- upb_fielddef_settypename(f2, "");
- ASSERT(upb_fielddef_number(f2) == 1572);
-
- upb_fielddef_unref(f1);
- upb_fielddef_unref(f2);
-}
-
-int main(int argc, char *argv[])
-{
- if (argc < 2) {
- fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
- return 1;
- }
- descriptor_file = argv[1];
-#define TEST(func) do { \
- int assertions_before = num_assertions; \
- printf("Running " #func "..."); fflush(stdout); \
- func(); \
- printf("ok (%d assertions).\n", num_assertions - assertions_before); \
- } while (0)
-
- TEST(test_upb_symtab);
- TEST(test_upb_jit);
- TEST(test_upb_two_fielddefs);
- printf("All tests passed (%d assertions).\n", num_assertions);
- return 0;
-}
diff --git a/tests/upb_test.h b/tests/upb_test.h
index 2bd340e..652977b 100644
--- a/tests/upb_test.h
+++ b/tests/upb_test.h
@@ -7,6 +7,7 @@
#ifndef UPB_TEST_H_
#define UPB_TEST_H_
+#include <stdio.h>
#include <stdlib.h>
#ifdef __cplusplus
@@ -18,9 +19,28 @@ int num_assertions = 0;
++num_assertions; \
if (!(expr)) { \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+ fprintf(stderr, "expr: %s\n", #expr); \
abort(); \
} \
-} while(0)
+} while (0)
+
+#define ASSERT_NOCOUNT(expr) do { \
+ if (!(expr)) { \
+ fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+ fprintf(stderr, "expr: %s\n", #expr); \
+ abort(); \
+ } \
+} while (0)
+
+#define ASSERT_STATUS(expr, status) do { \
+ ++num_assertions; \
+ if (!(expr)) { \
+ fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+ fprintf(stderr, "expr: %s\n", #expr); \
+ fprintf(stderr, "failed status: %s\n", upb_status_getstr(status)); \
+ abort(); \
+ } \
+} while (0)
#ifdef __cplusplus
} /* extern "C" */
diff --git a/tools/upbc.c b/tools/upbc.c
index a5d8897..4b25f3e 100644
--- a/tools/upbc.c
+++ b/tools/upbc.c
@@ -55,7 +55,7 @@ static void write_const_h(const upb_def *defs[], int num_entries,
for(int i = 0; i < num_entries; i++) { /* Foreach enum */
if(defs[i]->type != UPB_DEF_ENUM) continue;
const upb_enumdef *enumdef = upb_downcast_enumdef_const(defs[i]);
- char *enum_name = strdup(upb_def_fqname(UPB_UPCAST(enumdef)));
+ char *enum_name = strdup(upb_def_fullname(UPB_UPCAST(enumdef)));
char *enum_val_prefix = strdup(enum_name);
to_cident(enum_name);
to_preproc(enum_val_prefix);
@@ -63,11 +63,12 @@ static void write_const_h(const upb_def *defs[], int num_entries,
fprintf(stream, "typedef enum %s {\n", enum_name);
bool first = true;
/* Foreach enum value. */
- for (upb_enum_iter iter = upb_enum_begin(enumdef);
- !upb_enum_done(iter);
- iter = upb_enum_next(enumdef, iter)) {
- char *value_name = strdup(upb_enum_iter_name(iter));
- uint32_t value = upb_enum_iter_number(iter);
+ upb_enum_iter iter;
+ for (upb_enum_begin(&iter, enumdef);
+ !upb_enum_done(&iter);
+ upb_enum_next(&iter)) {
+ char *value_name = strdup(upb_enum_iter_name(&iter));
+ uint32_t value = upb_enum_iter_number(&iter);
to_preproc(value_name);
/* " GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13," */
if (!first) fputs(",\n", stream);
@@ -85,20 +86,20 @@ static void write_const_h(const upb_def *defs[], int num_entries,
for(int i = 0; i < num_entries; i++) { /* Foreach enum */
const upb_msgdef *m = upb_dyncast_msgdef_const(defs[i]);
if(!m) continue;
- char *msg_name = strdup(upb_def_fqname(UPB_UPCAST(m)));
+ char *msg_name = strdup(upb_def_fullname(UPB_UPCAST(m)));
char *msg_val_prefix = strdup(msg_name);
to_preproc(msg_val_prefix);
upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- char *preproc_field_name = strdup(f->name);
+ for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ char *preproc_field_name = strdup(upb_fielddef_name(f));
to_preproc(preproc_field_name);
fprintf(stream, "#define %s_%s__FIELDNUM %d\n",
msg_val_prefix, preproc_field_name, upb_fielddef_number(f));
fprintf(stream, "#define %s_%s__FIELDNAME \"%s\"\n",
- msg_val_prefix, preproc_field_name, f->name);
+ msg_val_prefix, preproc_field_name, upb_fielddef_name(f));
fprintf(stream, "#define %s_%s__FIELDTYPE %d\n\n",
- msg_val_prefix, preproc_field_name, f->type);
+ msg_val_prefix, preproc_field_name, upb_fielddef_type(f));
free(preproc_field_name);
}
free(msg_val_prefix);
@@ -123,13 +124,13 @@ const char usage[] =
" of using the input file as a basename.\n"
;
-void usage_err(char *err) {
+void usage_err(const char *err) {
fprintf(stderr, "upbc: %s\n\n", err);
fputs(usage, stderr);
exit(1);
}
-void error(char *err, ...) {
+void error(const char *err, ...) {
va_list args;
va_start(args, err);
fprintf(stderr, "upbc: ");
@@ -175,8 +176,8 @@ int main(int argc, char *argv[]) {
upb_status_uninit(&status);
/* Emit output files. */
- const int maxsize = 256;
- char h_const_filename[maxsize];
+ char h_const_filename[256];
+ const int maxsize = sizeof(h_const_filename);
if(snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize)
error("File base too long.\n");
@@ -184,9 +185,9 @@ int main(int argc, char *argv[]) {
if(!h_const_file) error("Failed to open _const.h output file\n");
int symcount;
- const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY);
+ const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY, &defs);
write_const_h(defs, symcount, h_const_filename, h_const_file);
- for (int i = 0; i < symcount; i++) upb_def_unref(defs[i]);
+ for (int i = 0; i < symcount; i++) upb_def_unref(defs[i], &defs);
free(defs);
free(descriptor);
upb_symtab_unref(s);
diff --git a/upb/atomic.h b/upb/atomic.h
deleted file mode 100644
index 2478fe4..0000000
--- a/upb/atomic.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Google Inc. See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * Only a very small part of upb is thread-safe. Notably, individual
- * messages, arrays, and strings are *not* thread safe for mutating.
- * However, we do make message *metadata* such as upb_msgdef and
- * upb_symtab thread-safe, and their ownership is tracked via atomic
- * refcounting. This header implements the small number of atomic
- * primitives required to support this. The primitives we implement
- * are:
- *
- * - a reader/writer lock (wrappers around platform-provided mutexes).
- * - an atomic refcount.
- *
- * TODO: This needs some revisiting/refinement, see:
- * http://code.google.com/p/upb/issues/detail?id=8
- */
-
-#ifndef UPB_ATOMIC_H_
-#define UPB_ATOMIC_H_
-
-#include <stdbool.h>
-#include <assert.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-// Until this stuff is actually working, make thread-unsafe the default.
-#define UPB_THREAD_UNSAFE
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Non-thread-safe implementations. ******************************************/
-
-typedef struct {
- int v;
-} upb_atomic_t;
-
-#define UPB_ATOMIC_INIT(x) {x}
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; }
-INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; }
-INLINE bool upb_atomic_unref(upb_atomic_t *a) { assert(a->v > 0); return --a->v == 0; }
-INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; }
-INLINE bool upb_atomic_add(upb_atomic_t *a, int val) {
- a->v += val;
- return a->v == 0;
-}
-
-#endif
-
-/* Atomic refcount ************************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Already defined above. */
-
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
-
-/* GCC includes atomic primitives. */
-
-typedef struct {
- volatile int v;
-} upb_atomic_t;
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
- a->v = val;
- __sync_synchronize(); /* Ensure the initialized value is visible. */
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_t *a) {
- return __sync_fetch_and_add(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_t *a, int n) {
- return __sync_add_and_fetch(&a->v, n) == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_t *a) {
- return __sync_sub_and_fetch(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_read(upb_atomic_t *a) {
- return __sync_fetch_and_add(&a->v, 0);
-}
-
-#elif defined(WIN32)
-
-/* Windows defines atomic increment/decrement. */
-#include <Windows.h>
-
-typedef struct {
- volatile LONG val;
-} upb_atomic_t;
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
- InterlockedExchange(&a->val, val);
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_t *a) {
- return InterlockedIncrement(&a->val) == 1;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_t *a) {
- return InterlockedDecrement(&a->val) == 0;
-}
-
-#else
-#error Atomic primitives not defined for your platform/CPU. \
- Implement them or compile with UPB_THREAD_UNSAFE.
-#endif
-
-INLINE bool upb_atomic_only(upb_atomic_t *a) {
- return upb_atomic_read(a) == 1;
-}
-
-/* Reader/Writer lock. ********************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-typedef struct {
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { (void)l; }
-
-#elif defined(UPB_USE_PTHREADS)
-
-#include <pthread.h>
-
-typedef struct {
- pthread_rwlock_t lock;
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(const upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_init(&l->lock, NULL);
-}
-
-INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_destroy(&l->lock);
-}
-
-INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_rdlock(&l->lock);
-}
-
-INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_wrlock(&l->lock);
-}
-
-INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_unlock(&l->lock);
-}
-
-#else
-#error Reader/writer lock is not defined for your platform/CPU. \
- Implement it or compile with UPB_THREAD_UNSAFE.
-#endif
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_ATOMIC_H_ */
diff --git a/upb/bytestream.c b/upb/bytestream.c
index 812e552..8feb678 100644
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@@ -32,8 +32,6 @@ upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
memcpy(ptr, str, len);
ptr[len] = '\0';
upb_stringsrc_reset(src, ptr, len);
- upb_byteregion_fetch(upb_stringsrc_allbytes(src));
- assert(len == upb_byteregion_available(upb_stringsrc_allbytes(src), 0));
return upb_stringsrc_allbytes(src);
}
@@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
upb_stdio_buf **reuse = NULL; // XXX
- uint32_t num_reused = 0, num_inuse = 0;
+ int num_reused = 0, num_inuse = 0;
// Could sweep only a subset of bufs if this was a hotspot.
- for (uint32_t i = 0; i < s->nbuf; i++) {
+ for (int i = 0; i < s->nbuf; i++) {
upb_stdio_buf *buf = s->bufs[i];
if (buf->refcount > 0) {
s->bufs[num_inuse++] = buf;
@@ -243,10 +241,9 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
upb_stringsrc *src = _src;
- assert(ofs <= src->len);
+ assert(ofs < src->len);
if (ofs == src->len) {
upb_status_seteof(&src->bytesrc.status);
- *read = 0;
return UPB_BYTE_EOF;
}
*read = src->len - ofs;
diff --git a/upb/bytestream.h b/upb/bytestream.h
index fe049d2..3217ee1 100644
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@@ -372,8 +372,7 @@ INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) {
}
INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) {
- int i;
- for (i = 0; i < len; i++)
+ for (int i = 0; i < len; i++)
if (upb_bytesink_write(sink, &ch, 1) < 0)
return -1;
return len;
@@ -436,7 +435,8 @@ typedef struct {
FILE *file;
bool should_close;
upb_stdio_buf **bufs;
- uint32_t nbuf, szbuf;
+ int nbuf;
+ uint32_t szbuf;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
diff --git a/upb/def.c b/upb/def.c
index 5ac3498..5a5b0f4 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -1,7 +1,7 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2008-2009 Google Inc. See LICENSE for details.
+ * Copyright (c) 2008-2012 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
@@ -11,168 +11,283 @@
#include "upb/bytestream.h"
#include "upb/def.h"
-#define alignof(t) offsetof(struct { char c; t x; }, x)
+// isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
+static bool upb_isbetween(char c, char low, char high) {
+ return c >= low && c <= high;
+}
-void upb_deflist_init(upb_deflist *l) {
- l->size = 8;
- l->defs = malloc(l->size * sizeof(void*));
- l->len = 0;
+static bool upb_isletter(char c) {
+ return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
}
-void upb_deflist_uninit(upb_deflist *l) {
- for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]);
- free(l->defs);
+static bool upb_isalphanum(char c) {
+ return upb_isletter(c) || upb_isbetween(c, '0', '9');
}
-void upb_deflist_push(upb_deflist *l, upb_def *d) {
- if(l->len == l->size) {
- l->size *= 2;
- l->defs = realloc(l->defs, l->size * sizeof(void*));
+static bool upb_isident(const char *str, size_t len, bool full) {
+ bool start = true;
+ for (size_t i = 0; i < len; i++) {
+ char c = str[i];
+ if (c == '.') {
+ if (start || !full) return false;
+ start = true;
+ } else if (start) {
+ if (!upb_isletter(c)) return false;
+ start = false;
+ } else {
+ if (!upb_isalphanum(c)) return false;
+ }
}
- l->defs[l->len++] = d;
+ return !start;
}
/* upb_def ********************************************************************/
static void upb_msgdef_free(upb_msgdef *m);
+static void upb_fielddef_free(upb_fielddef *f);
static void upb_enumdef_free(upb_enumdef *e);
-static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-bool upb_def_ismutable(const upb_def *def) { return def->symtab == NULL; }
+bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; }
+bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; }
-bool upb_def_setfqname(upb_def *def, const char *fqname) {
+bool upb_def_setfullname(upb_def *def, const char *fullname) {
assert(upb_def_ismutable(def));
- free(def->fqname);
- def->fqname = strdup(fqname);
- return true; // TODO: check for acceptable characters.
-}
-
-static void upb_def_free(upb_def *def) {
- switch (def->type) {
- case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
- case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
- case UPB_DEF_UNRESOLVED:
- upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break;
- default:
- assert(false);
- }
+ if (!upb_isident(fullname, strlen(fullname), true)) return false;
+ free(def->fullname);
+ def->fullname = strdup(fullname);
+ return true;
}
-upb_def *upb_def_dup(const upb_def *def) {
+upb_def *upb_def_dup(const upb_def *def, void *o) {
switch (def->type) {
case UPB_DEF_MSG:
- return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def)));
+ return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o));
+ case UPB_DEF_FIELD:
+ return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o));
case UPB_DEF_ENUM:
- return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def)));
+ return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o));
default: assert(false); return NULL;
}
}
-// Prior to being in a symtab, the def's refcount controls the lifetime of the
-// def itself. If the refcount falls to zero, the def is deleted. Once the
-// def belongs to a symtab, the def is owned by the symtab and its refcount
-// determines whether the def owns a ref on the symtab or not.
-void upb_def_ref(const upb_def *_def) {
- upb_def *def = (upb_def*)_def; // Need to modify refcount.
- if (upb_atomic_ref(&def->refcount) && def->symtab)
- upb_symtab_ref(def->symtab);
-}
-
-static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) {
- assert(upb_atomic_read(&d->refcount) > 0);
- d->symtab = s;
- upb_symtab_ref(s);
- upb_msgdef *m = upb_dyncast_msgdef(d);
- if (m) upb_inttable_compact(&m->itof);
+void upb_def_ref(const upb_def *_def, void *owner) {
+ upb_def *def = (upb_def*)_def;
+ upb_refcount_ref(&def->refcount, owner);
}
-void upb_def_unref(const upb_def *_def) {
- upb_def *def = (upb_def*)_def; // Need to modify refcount.
+void upb_def_unref(const upb_def *_def, void *owner) {
+ upb_def *def = (upb_def*)_def;
if (!def) return;
- if (upb_atomic_unref(&def->refcount)) {
- if (def->symtab) {
- upb_symtab_unref(def->symtab);
- // Def might be deleted now.
- } else {
- upb_def_free(def);
+ if (!upb_refcount_unref(&def->refcount, owner)) return;
+ upb_def *base = def;
+ // Free all defs in the SCC.
+ do {
+ upb_def *next = (upb_def*)def->refcount.next;
+ switch (def->type) {
+ case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
+ case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break;
+ case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
+ default:
+ assert(false);
}
- }
+ def = next;
+ } while(def != base);
}
-static void upb_def_init(upb_def *def, upb_deftype_t type) {
+static bool upb_def_init(upb_def *def, upb_deftype_t type, void *owner) {
def->type = type;
- def->fqname = NULL;
- def->symtab = NULL;
- upb_atomic_init(&def->refcount, 1);
+ def->is_finalized = false;
+ def->fullname = NULL;
+ return upb_refcount_init(&def->refcount, owner);
}
static void upb_def_uninit(upb_def *def) {
- free(def->fqname);
+ upb_refcount_uninit(&def->refcount);
+ free(def->fullname);
}
+void upb_def_donateref(const upb_def *_def, void *from, void *to) {
+ upb_def *def = (upb_def*)_def;
+ upb_refcount_donateref(&def->refcount, from, to);
+}
-/* upb_unresolveddef **********************************************************/
-
-// Unresolved defs are used as temporary placeholders for a def whose name has
-// not been resolved yet. During the name resolution step, all unresolved defs
-// are replaced with pointers to the actual def being referenced.
-typedef struct _upb_unresolveddef {
- upb_def base;
-} upb_unresolveddef;
+static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) {
+ upb_def *def = (upb_def*)refcount;
+ switch (def->type) {
+ case UPB_DEF_MSG: {
+ upb_msgdef *m = upb_downcast_msgdef(def);
+ upb_msg_iter i;
+ for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ upb_refcount_visit(refcount, &f->base.refcount, closure);
+ }
+ break;
+ }
+ case UPB_DEF_FIELD: {
+ upb_fielddef *f = upb_downcast_fielddef(def);
+ assert(f->msgdef);
+ upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure);
+ upb_def *subdef = f->sub.def;
+ if (subdef)
+ upb_refcount_visit(refcount, &subdef->refcount, closure);
+ break;
+ }
+ case UPB_DEF_ENUM:
+ case UPB_DEF_SERVICE:
+ case UPB_DEF_ANY:
+ break;
+ }
+}
-// Is passed a ref on the string.
-static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
- upb_unresolveddef *def = malloc(sizeof(*def));
- upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
- def->base.fqname = strdup(str);
- return def;
+static bool upb_validate_field(const upb_fielddef *f, upb_status *s) {
+ if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) {
+ upb_status_seterrliteral(s, "fielddef must have name and number set");
+ return false;
+ }
+ if (upb_hassubdef(f)) {
+ if (f->subdef_is_symbolic) {
+ upb_status_seterrf(s,
+ "field %s has not been resolved", upb_fielddef_name(f));
+ return false;
+ } else if (upb_fielddef_subdef(f) == NULL) {
+ upb_status_seterrf(s,
+ "field is %s missing required subdef", upb_fielddef_name(f));
+ return false;
+ } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) {
+ upb_status_seterrf(s,
+ "field %s subtype is not being finalized", upb_fielddef_name(f));
+ return false;
+ }
+ }
+ return true;
}
-static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
- upb_def_uninit(&def->base);
- free(def);
+bool upb_finalize(upb_def *const*defs, int n, upb_status *s) {
+ if (n >= UINT16_MAX - 1) {
+ upb_status_seterrliteral(s, "too many defs (max is 64k at a time)");
+ return false;
+ }
+
+ // First perform validation, in two passes so we can check that we have a
+ // transitive closure without needing to search.
+ for (int i = 0; i < n; i++) {
+ upb_def *def = defs[i];
+ if (upb_def_isfinalized(def)) {
+ // Could relax this requirement if it's annoying.
+ upb_status_seterrliteral(s, "def is already finalized");
+ goto err;
+ } else if (def->type == UPB_DEF_FIELD) {
+ upb_status_seterrliteral(s, "standalone fielddefs can not be finalized");
+ goto err;
+ } else {
+ // Set now to detect transitive closure in the second pass.
+ def->is_finalized = true;
+ }
+ }
+
+ for (int i = 0; i < n; i++) {
+ upb_msgdef *m = upb_dyncast_msgdef(defs[i]);
+ if (!m) continue;
+ upb_inttable_compact(&m->itof);
+ upb_msg_iter j;
+ for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+ upb_fielddef *f = upb_msg_iter_field(&j);
+ assert(f->msgdef == m);
+ if (!upb_validate_field(f, s)) goto err;
+ }
+ }
+
+ // Validation all passed, now find strongly-connected components so that
+ // our refcounting works with cycles.
+ upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors);
+
+ // Now that ref cycles have been removed it is safe to have each fielddef
+ // take a ref on its subdef (if any), but only if it's a member of another
+ // SCC.
+ for (int i = 0; i < n; i++) {
+ upb_msgdef *m = upb_dyncast_msgdef(defs[i]);
+ if (!m) continue;
+ upb_msg_iter j;
+ for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+ upb_fielddef *f = upb_msg_iter_field(&j);
+ f->base.is_finalized = true;
+ // Release the ref taken in upb_msgdef_addfields().
+ upb_fielddef_unref(f, m);
+ if (!upb_hassubdef(f)) continue;
+ assert(upb_fielddef_subdef(f));
+ if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) {
+ // Subdef is part of a different strongly-connected component.
+ upb_def_ref(f->sub.def, &f->sub.def);
+ f->subdef_is_owned = true;
+ }
+ }
+ }
+
+ return true;
+
+err:
+ for (int i = 0; i < n; i++) {
+ defs[i]->is_finalized = false;
+ }
+ return false;
}
/* upb_enumdef ****************************************************************/
-upb_enumdef *upb_enumdef_new() {
+upb_enumdef *upb_enumdef_new(void *owner) {
upb_enumdef *e = malloc(sizeof(*e));
- upb_def_init(&e->base, UPB_DEF_ENUM);
- upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
- upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
+ if (!e) return NULL;
+ if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2;
+ if (!upb_strtable_init(&e->ntoi)) goto err2;
+ if (!upb_inttable_init(&e->iton)) goto err1;
return e;
+
+err1:
+ upb_strtable_uninit(&e->ntoi);
+err2:
+ free(e);
+ return NULL;
}
static void upb_enumdef_free(upb_enumdef *e) {
- upb_enum_iter i;
- for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
- // Frees the ref taken when the string was parsed.
- free(upb_enum_iter_name(i));
- }
- upb_strtable_free(&e->ntoi);
- upb_inttable_free(&e->iton);
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, &e->iton);
+ for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ // To clean up the strdup() from upb_enumdef_addval().
+ free(upb_value_getptr(upb_inttable_iter_value(&i)));
+ }
+ upb_strtable_uninit(&e->ntoi);
+ upb_inttable_uninit(&e->iton);
upb_def_uninit(&e->base);
free(e);
}
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e) {
- upb_enumdef *new_e = upb_enumdef_new();
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner) {
+ upb_enumdef *new_e = upb_enumdef_new(owner);
+ if (!new_e) return NULL;
upb_enum_iter i;
- for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
- assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i),
- upb_enum_iter_number(i)));
+ for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
+ bool success = upb_enumdef_addval(
+ new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i));
+ if (!success) {
+ upb_enumdef_unref(new_e, owner);
+ return NULL;
+ }
}
return new_e;
}
-bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
- if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) {
+ if (!upb_isident(name, strlen(name), false)) return false;
+ if (upb_enumdef_ntoi(e, name, NULL))
+ return false;
+ if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num)))
+ return false;
+ if (!upb_inttable_lookup(&e->iton, num) &&
+ !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name))))
return false;
- upb_iton_ent ent = {0, strdup(name)};
- upb_strtable_insert(&e->ntoi, name, &num);
- upb_inttable_insert(&e->iton, num, &ent);
return true;
}
@@ -181,42 +296,70 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) {
e->defaultval = val;
}
-upb_enum_iter upb_enum_begin(const upb_enumdef *e) {
- // We could iterate over either table here; the choice is arbitrary.
- return upb_inttable_begin(&e->iton);
+void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
+ // We iterate over the ntoi table, to account for duplicate numbers.
+ upb_strtable_begin(i, &e->ntoi);
}
-upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter) {
- return upb_inttable_next(&e->iton, iter);
-}
+void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
+bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
-const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
- upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
- return e ? e->str : NULL;
-}
-
-bool upb_enumdef_ntoil(upb_enumdef *def, const char *name, size_t len, int32_t *num) {
- upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
- if (!e) return false;
- if (num) *num = e->value;
+bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) {
+ const upb_value *v = upb_strtable_lookup(&def->ntoi, name);
+ if (!v) return false;
+ if (num) *num = upb_value_getint32(*v);
return true;
}
-bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num) {
- return upb_enumdef_ntoil(e, name, strlen(name), num);
+const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
+ const upb_value *v = upb_inttable_lookup32(&def->iton, num);
+ return v ? upb_value_getptr(*v) : NULL;
}
/* upb_fielddef ***************************************************************/
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(ctype, inmemory_type) \
+ {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type}
+
+const upb_typeinfo upb_types[UPB_NUM_TYPES] = {
+ // END_GROUP is not real, but used to signify the pseudo-field that
+ // ends a group from within the group.
+ TYPE_INFO(void*, PTR), // ENDGROUP
+ TYPE_INFO(double, DOUBLE), // DOUBLE
+ TYPE_INFO(float, FLOAT), // FLOAT
+ TYPE_INFO(int64_t, INT64), // INT64
+ TYPE_INFO(uint64_t, UINT64), // UINT64
+ TYPE_INFO(int32_t, INT32), // INT32
+ TYPE_INFO(uint64_t, UINT64), // FIXED64
+ TYPE_INFO(uint32_t, UINT32), // FIXED32
+ TYPE_INFO(bool, BOOL), // BOOL
+ TYPE_INFO(void*, BYTEREGION), // STRING
+ TYPE_INFO(void*, PTR), // GROUP
+ TYPE_INFO(void*, PTR), // MESSAGE
+ TYPE_INFO(void*, BYTEREGION), // BYTES
+ TYPE_INFO(uint32_t, UINT32), // UINT32
+ TYPE_INFO(uint32_t, INT32), // ENUM
+ TYPE_INFO(int32_t, INT32), // SFIXED32
+ TYPE_INFO(int64_t, INT64), // SFIXED64
+ TYPE_INFO(int32_t, INT32), // SINT32
+ TYPE_INFO(int64_t, INT64), // SINT64
+};
+
static void upb_fielddef_init_default(upb_fielddef *f);
-upb_fielddef *upb_fielddef_new() {
+upb_fielddef *upb_fielddef_new(void *owner) {
upb_fielddef *f = malloc(sizeof(*f));
+ if (!f) return NULL;
+ if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) {
+ free(f);
+ return NULL;
+ }
f->msgdef = NULL;
- f->def = NULL;
- upb_atomic_init(&f->refcount, 1);
- f->finalized = false;
+ f->sub.def = NULL;
+ f->subdef_is_symbolic = false;
+ f->subdef_is_owned = false;
f->label = UPB_LABEL(OPTIONAL);
f->hasbit = -1;
f->offset = 0;
@@ -226,14 +369,68 @@ upb_fielddef *upb_fielddef_new() {
// These are initialized to be invalid; the user must set them explicitly.
// Could relax this later if it's convenient and non-confusing to have a
// defaults for them.
- f->name = NULL;
- f->type = 0;
+ f->type = UPB_TYPE_NONE;
f->number = 0;
upb_fielddef_init_default(f);
return f;
}
+static void upb_fielddef_uninit_default(upb_fielddef *f) {
+ if (f->default_is_string)
+ upb_byteregion_free(upb_value_getbyteregion(f->defaultval));
+}
+
+static void upb_fielddef_free(upb_fielddef *f) {
+ if (f->subdef_is_owned)
+ upb_def_unref(f->sub.def, &f->sub.def);
+ upb_fielddef_uninit_default(f);
+ upb_def_uninit(UPB_UPCAST(f));
+ free(f);
+}
+
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner) {
+ upb_fielddef *newf = upb_fielddef_new(owner);
+ if (!newf) return NULL;
+ upb_fielddef_settype(newf, upb_fielddef_type(f));
+ upb_fielddef_setlabel(newf, upb_fielddef_label(f));
+ upb_fielddef_setnumber(newf, upb_fielddef_number(f));
+ upb_fielddef_setname(newf, upb_fielddef_name(f));
+ upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f));
+ upb_fielddef_setoffset(newf, upb_fielddef_offset(f));
+ upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f));
+ upb_fielddef_setfval(newf, upb_fielddef_fval(f));
+ if (f->default_is_string) {
+ upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f));
+ size_t len;
+ const char *ptr = upb_byteregion_getptr(r, 0, &len);
+ assert(len == upb_byteregion_len(r));
+ upb_fielddef_setdefaultstr(newf, ptr, len);
+ } else {
+ upb_fielddef_setdefault(newf, upb_fielddef_default(f));
+ }
+
+ const char *srcname;
+ if (f->subdef_is_symbolic) {
+ srcname = f->sub.name; // Might be NULL.
+ } else {
+ srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
+ }
+ if (srcname) {
+ char *newname = malloc(strlen(f->sub.def->fullname) + 2);
+ if (!newname) {
+ upb_fielddef_unref(newf, owner);
+ return NULL;
+ }
+ strcpy(newname, ".");
+ strcat(newname, f->sub.def->fullname);
+ upb_fielddef_setsubtypename(newf, newname);
+ free(newname);
+ }
+
+ return newf;
+}
+
static void upb_fielddef_init_default(upb_fielddef *f) {
f->default_is_string = false;
switch (upb_fielddef_type(f)) {
@@ -253,105 +450,62 @@ static void upb_fielddef_init_default(upb_fielddef *f) {
case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
- f->default_is_string = true;
- upb_value_setbyteregion(&f->defaultval, upb_byteregion_new(""));
- break;
+ upb_value_setbyteregion(&f->defaultval, upb_byteregion_new(""));
+ f->default_is_string = true;
+ break;
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break;
+ case UPB_TYPE_ENDGROUP: assert(false);
+ case UPB_TYPE_NONE: break;
}
}
-static void upb_fielddef_uninit_default(upb_fielddef *f) {
- if (f->default_is_string) {
- upb_byteregion_free(upb_value_getbyteregion(f->defaultval));
- }
-}
-
-static void upb_fielddef_free(upb_fielddef *f) {
- upb_fielddef_uninit_default(f);
- if (f->def) {
- // We own a ref on the subdef iff we are not part of a msgdef.
- if (f->msgdef == NULL) {
- if (f->def) upb_downcast_unresolveddef(f->def); // assert() check.
- upb_def_unref(f->def);
- }
- }
- free(f->name);
- free(f);
-}
-
-void upb_fielddef_ref(upb_fielddef *f) {
- // TODO.
- (void)f;
-}
-
-void upb_fielddef_unref(upb_fielddef *f) {
- // TODO.
- (void)f;
- if (!f) return;
- if (upb_atomic_unref(&f->refcount)) {
- if (f->msgdef) {
- upb_msgdef_unref(f->msgdef);
- // fielddef might be deleted now.
- } else {
- upb_fielddef_free(f);
- }
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
+ if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) {
+ assert(f->sub.def);
+ return f->sub.def;
+ } else {
+ return f->subdef_is_symbolic ? NULL : f->sub.def;
}
}
-upb_fielddef *upb_fielddef_dup(upb_fielddef *f) {
- upb_fielddef *newf = upb_fielddef_new();
- newf->msgdef = f->msgdef;
- newf->type = f->type;
- newf->label = f->label;
- newf->number = f->number;
- newf->name = f->name;
- upb_fielddef_settypename(newf, f->def->fqname);
- return f;
+upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
+ return (upb_def*)upb_fielddef_subdef(f);
}
-bool upb_fielddef_ismutable(const upb_fielddef *f) {
- return !f->msgdef || upb_def_ismutable(UPB_UPCAST(f->msgdef));
+const char *upb_fielddef_subtypename(upb_fielddef *f) {
+ assert(upb_fielddef_ismutable(f));
+ return f->subdef_is_symbolic ? f->sub.name : NULL;
}
-upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
- if (upb_hassubdef(f) && !upb_fielddef_ismutable(f))
- return f->def;
- else
- return NULL;
-}
-
-static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
- assert(upb_dyncast_unresolveddef(f->def));
- upb_def_unref(f->def);
- f->def = def;
- if (f->type == UPB_TYPE(ENUM) && f->default_is_string) {
- // Resolve the enum's default from a string to an integer.
- upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
- assert(bytes); // Points to either a real default or the empty string.
- upb_enumdef *e = upb_downcast_enumdef(f->def);
- int32_t val = 0;
- // Could do a sanity check that the default value does not have embedded
- // NULLs.
- if (upb_byteregion_len(bytes) == 0) {
- upb_value_setint32(&f->defaultval, e->defaultval);
- } else {
- size_t len;
- // ptr is guaranteed to be NULL-terminated because the byteregion was
- // created with upb_byteregion_newl().
- const char *ptr = upb_byteregion_getptr(bytes, 0, &len);
- assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk.
- bool success = upb_enumdef_ntoi(e, ptr, &val);
- if (!success) {
- upb_status_seterrf(
- s, "Default enum value (%s) is not a member of the enum", ptr);
- return false;
- }
- upb_value_setint32(&f->defaultval, val);
+// Could expose this to clients if a client wants to call it independently
+// of upb_resolve() for whatever reason.
+static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) {
+ if (!f->default_is_string) return true;
+ // Resolve the enum's default from a string to an integer.
+ upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
+ assert(bytes); // Points to either a real default or the empty string.
+ upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f));
+ int32_t val = 0;
+ if (upb_byteregion_len(bytes) == 0) {
+ upb_value_setint32(&f->defaultval, e->defaultval);
+ } else {
+ size_t len;
+ // ptr is guaranteed to be NULL-terminated because the byteregion was
+ // created with upb_byteregion_newl().
+ const char *ptr = upb_byteregion_getptr(
+ bytes, upb_byteregion_startofs(bytes), &len);
+ assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk.
+ bool success = upb_enumdef_ntoi(e, ptr, &val);
+ if (!success) {
+ upb_status_seterrf(
+ s, "Default enum value (%s) is not a member of the enum", ptr);
+ return false;
}
- f->default_is_string = false;
- upb_byteregion_free(bytes);
+ upb_value_setint32(&f->defaultval, val);
}
+ f->default_is_string = false;
+ upb_byteregion_free(bytes);
return true;
}
@@ -361,42 +515,50 @@ bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
return true;
}
-bool upb_fielddef_setname(upb_fielddef *f, const char *name) {
- assert(f->msgdef == NULL);
- free(f->name);
- f->name = strdup(name);
- return true;
-}
-
-bool upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
- assert(!f->finalized);
+bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
+ assert(upb_fielddef_ismutable(f));
upb_fielddef_uninit_default(f);
f->type = type;
upb_fielddef_init_default(f);
return true;
}
-bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) {
- assert(!f->finalized);
+bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
+ assert(upb_fielddef_ismutable(f));
f->label = label;
return true;
}
void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) {
- assert(!f->finalized);
- assert(!upb_isstring(f));
+ assert(upb_fielddef_ismutable(f));
+ assert(!upb_isstring(f) && !upb_issubmsg(f));
+ if (f->default_is_string) {
+ upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
+ assert(bytes);
+ upb_byteregion_free(bytes);
+ }
f->defaultval = value;
+ f->default_is_string = false;
}
-void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) {
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) {
assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM));
if (f->default_is_string) {
upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
assert(bytes);
upb_byteregion_free(bytes);
- }
- upb_value_setbyteregion(&f->defaultval, upb_byteregion_newl(str, len));
+ } else {
+ assert(f->type == UPB_TYPE(ENUM));
+ }
+ if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false;
+ upb_byteregion *r = upb_byteregion_newl(str, len);
+ upb_value_setbyteregion(&f->defaultval, r);
+ upb_bytesuccess_t ret = upb_byteregion_fetch(r);
+ (void)ret;
+ assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK));
+ assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r));
f->default_is_string = true;
+ return true;
}
void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) {
@@ -404,82 +566,106 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) {
}
void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) {
- assert(!f->finalized);
- // TODO: string ownership?
+ assert(upb_fielddef_ismutable(f));
+ // TODO: we need an ownership/freeing mechanism for dynamically-allocated
+ // fvals. One possibility is to let the user supply a free() function
+ // and call it when the fval is no longer referenced. Would have to
+ // ensure that no common use cases need cycles.
+ //
+ // For now the fval has no ownership; the caller must simply guarantee
+ // somehow that it outlives any handlers/plan.
f->fval = fval;
}
-void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) {
- assert(!f->finalized);
- f->accessor = vtbl;
+void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) {
+ assert(upb_fielddef_ismutable(f));
+ f->hasbit = hasbit;
}
-bool upb_fielddef_settypename(upb_fielddef *f, const char *name) {
- upb_def_unref(f->def);
- f->def = UPB_UPCAST(upb_unresolveddef_new(name));
- return true;
+void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) {
+ assert(upb_fielddef_ismutable(f));
+ f->offset = offset;
}
-// Returns an ordering of fields based on:
-// 1. value size (small to large).
-// 2. field number.
-static int upb_fielddef_cmpval(const void *_f1, const void *_f2) {
- upb_fielddef *f1 = *(void**)_f1;
- upb_fielddef *f2 = *(void**)_f2;
- size_t size1 = upb_types[f1->type].size;
- size_t size2 = upb_types[f2->type].size;
- if (size1 != size2) return size1 - size2;
- // Otherwise return in number order.
- return f1->number - f2->number;
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) {
+ assert(upb_fielddef_ismutable(f));
+ f->accessor = tbl;
}
-// Returns an ordering of all fields based on:
-// 1. required/optional (required fields first).
-// 2. field number
-static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) {
- upb_fielddef *f1 = *(void**)_f1;
- upb_fielddef *f2 = *(void**)_f2;
- size_t req1 = f1->label == UPB_LABEL(REQUIRED);
- size_t req2 = f2->label == UPB_LABEL(REQUIRED);
- if (req1 != req2) return req1 - req2;
- // Otherwise return in number order.
- return f1->number - f2->number;
+static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) {
+ if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP))
+ return upb_dyncast_msgdef_const(subdef) != NULL;
+ else if (f->type == UPB_TYPE(ENUM))
+ return upb_dyncast_enumdef_const(subdef) != NULL;
+ else {
+ assert(false);
+ return false;
+ }
+}
+
+bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) {
+ assert(upb_fielddef_ismutable(f));
+ assert(upb_hassubdef(f));
+ assert(subdef);
+ if (!upb_subtype_typecheck(f, subdef)) return false;
+ if (f->subdef_is_symbolic) free(f->sub.name);
+ f->sub.def = subdef;
+ f->subdef_is_symbolic = false;
+ return true;
+}
+
+bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) {
+ assert(upb_fielddef_ismutable(f));
+ assert(upb_hassubdef(f));
+ if (f->subdef_is_symbolic) free(f->sub.name);
+ f->sub.name = strdup(name);
+ f->subdef_is_symbolic = true;
+ return true;
}
/* upb_msgdef *****************************************************************/
-upb_msgdef *upb_msgdef_new() {
+upb_msgdef *upb_msgdef_new(void *owner) {
upb_msgdef *m = malloc(sizeof(*m));
- upb_def_init(&m->base, UPB_DEF_MSG);
- upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
- upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+ if (!m) return NULL;
+ if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2;
+ if (!upb_inttable_init(&m->itof)) goto err2;
+ if (!upb_strtable_init(&m->ntof)) goto err1;
m->size = 0;
m->hasbit_bytes = 0;
m->extstart = 0;
m->extend = 0;
return m;
+
+err1:
+ upb_inttable_uninit(&m->itof);
+err2:
+ free(m);
+ return NULL;
}
static void upb_msgdef_free(upb_msgdef *m) {
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
- upb_fielddef_free(upb_msg_iter_field(i));
- upb_strtable_free(&m->ntof);
- upb_inttable_free(&m->itof);
+ upb_strtable_uninit(&m->ntof);
+ upb_inttable_uninit(&m->itof);
upb_def_uninit(&m->base);
free(m);
}
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m) {
- upb_msgdef *newm = upb_msgdef_new();
- newm->size = m->size;
- newm->hasbit_bytes = m->hasbit_bytes;
- newm->extstart = m->extstart;
- newm->extend = m->extend;
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner) {
+ upb_msgdef *newm = upb_msgdef_new(owner);
+ if (!newm) return NULL;
+ upb_msgdef_setsize(newm, upb_msgdef_size(m));
+ upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m));
+ upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m));
+ upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m)));
upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i)));
+ for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
+ if (!f || !upb_msgdef_addfield(newm, f, &f)) {
+ upb_msgdef_unref(newm, owner);
+ return NULL;
+ }
}
return newm;
}
@@ -506,160 +692,69 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) {
return true;
}
-bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n) {
+bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n,
+ void *ref_donor) {
// Check constraints for all fields before performing any action.
for (int i = 0; i < n; i++) {
upb_fielddef *f = fields[i];
- assert(upb_atomic_read(&f->refcount) > 0);
- if (f->name == NULL || f->number == 0 ||
- upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name))
+ if (f->msgdef != NULL ||
+ upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 ||
+ upb_msgdef_itof(m, upb_fielddef_number(f)) ||
+ upb_msgdef_ntof(m, upb_fielddef_name(f)))
return false;
}
// Constraint checks ok, perform the action.
for (int i = 0; i < n; i++) {
upb_fielddef *f = fields[i];
- upb_msgdef_ref(m);
- assert(f->msgdef == NULL);
f->msgdef = m;
- upb_itof_ent itof_ent = {0, f};
- upb_inttable_insert(&m->itof, f->number, &itof_ent);
- upb_strtable_insert(&m->ntof, f->name, &f);
+ upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
+ upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
+ upb_fielddef_ref(f, m);
+ if (ref_donor) upb_fielddef_unref(f, ref_donor);
}
return true;
}
-static int upb_div_round_up(int numerator, int denominator) {
- /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
- return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
-}
-
-void upb_msgdef_layout(upb_msgdef *m) {
- // Create an ordering over the fields, but only include fields with accessors.
- upb_fielddef **sorted_fields =
- malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m));
- int n = 0;
- upb_msg_iter i;
- for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if (f->accessor) sorted_fields[n++] = f;
- }
-
- m->hasbit_bytes = upb_div_round_up(n, 8);
- m->size = m->hasbit_bytes; // + header_size?
-
- // Assign hasbits.
- qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit);
- for (int i = 0; i < n; i++) {
- upb_fielddef *f = sorted_fields[i];
- f->hasbit = i;
- }
-
- // Assign value offsets.
- qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval);
- size_t max_align = 0;
- for (int i = 0; i < n; i++) {
- upb_fielddef *f = sorted_fields[i];
- const upb_type_info *type_info = &upb_types[f->type];
- size_t size = type_info->size;
- size_t align = type_info->align;
- if (upb_isseq(f)) {
- size = sizeof(void*);
- align = alignof(void*);
- }
-
- // General alignment rules are: each member must be at an address that is a
- // multiple of that type's alignment. Also, the size of the structure as a
- // whole must be a multiple of the greatest alignment of any member.
- f->offset = upb_align_up(m->size, align);
- m->size = f->offset + size;
- max_align = UPB_MAX(max_align, align);
- }
- if (max_align > 0) m->size = upb_align_up(m->size, max_align);
-
- free(sorted_fields);
-}
-
-upb_msg_iter upb_msg_begin(const upb_msgdef *m) {
- return upb_inttable_begin(&m->itof);
+void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
+ upb_inttable_begin(iter, &m->itof);
}
-upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter) {
- return upb_inttable_next(&m->itof, iter);
-}
+void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); }
/* upb_symtab *****************************************************************/
-typedef struct {
- upb_def *def;
-} upb_symtab_ent;
-
-// Given a symbol and the base symbol inside which it is defined, find the
-// symbol's definition in t.
-static upb_symtab_ent *upb_resolve(const upb_strtable *t,
- const char *base, const char *sym) {
- if(strlen(sym) == 0) return NULL;
- if(sym[0] == UPB_SYMBOL_SEPARATOR) {
- // Symbols starting with '.' are absolute, so we do a single lookup.
- // Slice to omit the leading '.'
- return upb_strtable_lookup(t, sym + 1);
- } else {
- // Remove components from base until we find an entry or run out.
- // TODO: This branch is totally broken, but currently not used.
- (void)base;
- assert(false);
- return NULL;
- }
-}
-
-static void _upb_symtab_free(upb_strtable *t) {
- upb_strtable_iter i;
- upb_strtable_begin(&i, t);
- for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- const upb_symtab_ent *e = upb_strtable_iter_value(&i);
- assert(upb_atomic_read(&e->def->refcount) == 0);
- upb_def_free(e->def);
- }
- upb_strtable_free(t);
-}
-
static void upb_symtab_free(upb_symtab *s) {
- _upb_symtab_free(&s->symtab);
- for (uint32_t i = 0; i < s->olddefs.len; i++) {
- upb_def *d = s->olddefs.defs[i];
- assert(upb_atomic_read(&d->refcount) == 0);
- upb_def_free(d);
- }
- upb_rwlock_destroy(&s->lock);
- upb_deflist_uninit(&s->olddefs);
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, &s->symtab);
+ for (; !upb_strtable_done(&i); upb_strtable_next(&i))
+ upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s);
+ upb_strtable_uninit(&s->symtab);
free(s);
}
void upb_symtab_ref(const upb_symtab *_s) {
upb_symtab *s = (upb_symtab*)_s;
- upb_atomic_ref(&s->refcount);
+ s->refcount++;
}
void upb_symtab_unref(const upb_symtab *_s) {
upb_symtab *s = (upb_symtab*)_s;
- if(s && upb_atomic_unref(&s->refcount)) {
+ if(s && --s->refcount == 0) {
upb_symtab_free(s);
}
}
upb_symtab *upb_symtab_new() {
upb_symtab *s = malloc(sizeof(*s));
- upb_atomic_init(&s->refcount, 1);
- upb_rwlock_init(&s->lock);
- upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
- upb_deflist_init(&s->olddefs);
+ s->refcount = 1;
+ upb_strtable_init(&s->symtab);
return s;
}
const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count,
- upb_deftype_t type) {
- upb_rwlock_rdlock(&s->lock);
+ upb_deftype_t type, void *owner) {
int total = upb_strtable_count(&s->symtab);
// We may only use part of this, depending on how many symbols are of the
// correct type.
@@ -668,177 +763,252 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count,
upb_strtable_begin(&iter, &s->symtab);
int i = 0;
for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
- const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
- upb_def *def = e->def;
+ upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
assert(def);
if(type == UPB_DEF_ANY || def->type == type)
defs[i++] = def;
}
- upb_rwlock_unlock(&s->lock);
*count = i;
- for(i = 0; i < *count; i++) upb_def_ref(defs[i]);
+ if (owner)
+ for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner);
return defs;
}
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
- upb_rwlock_rdlock(&s->lock);
- upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
- upb_def *ret = NULL;
- if(e) {
- ret = e->def;
- upb_def_ref(ret);
- }
- upb_rwlock_unlock(&s->lock);
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym,
+ void *owner) {
+ const upb_value *v = upb_strtable_lookup(&s->symtab, sym);
+ upb_def *ret = v ? upb_value_getptr(*v) : NULL;
+ if (ret) upb_def_ref(ret, owner);
return ret;
}
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
- upb_rwlock_rdlock(&s->lock);
- upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym,
+ void *owner) {
+ const upb_value *v = upb_strtable_lookup(&s->symtab, sym);
+ upb_def *def = v ? upb_value_getptr(*v) : NULL;
upb_msgdef *ret = NULL;
- if(e && e->def->type == UPB_DEF_MSG) {
- ret = upb_downcast_msgdef(e->def);
- upb_def_ref(UPB_UPCAST(ret));
+ if(def && def->type == UPB_DEF_MSG) {
+ ret = upb_downcast_msgdef(def);
+ upb_def_ref(def, owner);
}
- upb_rwlock_unlock(&s->lock);
return ret;
}
+// Given a symbol and the base symbol inside which it is defined, find the
+// symbol's definition in t.
+static upb_def *upb_resolvename(const upb_strtable *t,
+ const char *base, const char *sym) {
+ if(strlen(sym) == 0) return NULL;
+ if(sym[0] == UPB_SYMBOL_SEPARATOR) {
+ // Symbols starting with '.' are absolute, so we do a single lookup.
+ // Slice to omit the leading '.'
+ const upb_value *v = upb_strtable_lookup(t, sym + 1);
+ return v ? upb_value_getptr(*v) : NULL;
+ } else {
+ // Remove components from base until we find an entry or run out.
+ // TODO: This branch is totally broken, but currently not used.
+ (void)base;
+ assert(false);
+ return NULL;
+ }
+}
+
const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
- const char *sym) {
- upb_rwlock_rdlock(&s->lock);
- upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
- upb_def *ret = NULL;
- if(e) {
- ret = e->def;
- upb_def_ref(ret);
- }
- upb_rwlock_unlock(&s->lock);
+ const char *sym, void *owner) {
+ upb_def *ret = upb_resolvename(&s->symtab, base, sym);
+ if (ret) upb_def_ref(ret, owner);
return ret;
}
-bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
- upb_strtable *addtab) {
- // This linear search makes the DFS O(n^2) in the length of the paths.
- // Could make this O(n) with a hash table, but n is small.
- for (int i = 0; i < n; i++) {
- if (def == open_defs[i]) return false;
- }
-
- bool needcopy = false;
- upb_msgdef *m = upb_dyncast_msgdef(def);
- if (m) {
- upb_msg_iter i;
- open_defs[n++] = def;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if (!upb_hassubdef(f)) continue;
- needcopy |= upb_symtab_dfs(f->def, open_defs, n, addtab);
+// Adds dups of any existing def that can reach a def with the same name as one
+// of "defs." This is to provide a consistent output graph as documented in
+// the header file. We use a modified depth-first traversal that traverses
+// each SCC (which we already computed) as if it were a single node. This
+// allows us to traverse the possibly-cyclic graph as if it were a DAG and to
+// easily dup the correct set of nodes with O(n) time.
+//
+// Returns true if defs that can reach "def" need to be duplicated into deftab.
+static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab,
+ void *new_owner, upb_inttable *seen,
+ upb_status *s) {
+ // Memoize results of this function for efficiency (since we're traversing a
+ // DAG this is not needed to limit the depth of the search).
+ upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def);
+ if (v) return upb_value_getbool(*v);
+
+ // Visit submessages for all messages in the SCC.
+ bool need_dup = false;
+ const upb_def *base = def;
+ do {
+ assert(upb_def_isfinalized(def));
+ if (def->type == UPB_DEF_FIELD) continue;
+ upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def));
+ if (v) {
+ upb_def *add_def = upb_value_getptr(*v);
+ if (add_def->refcount.next && add_def->refcount.next != &def->refcount) {
+ upb_status_seterrf(s, "conflicting existing defs for name: '%s'",
+ upb_def_fullname(def));
+ return false;
+ }
+ need_dup = true;
+ }
+ const upb_msgdef *m = upb_dyncast_msgdef_const(def);
+ if (m) {
+ upb_msg_iter i;
+ for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ if (!upb_hassubdef(f)) continue;
+ // |= to avoid short-circuit; we need its side-effects.
+ need_dup |= upb_resolve_dfs(
+ upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s);
+ if (!upb_ok(s)) return false;
+ }
}
+ } while ((def = (upb_def*)def->refcount.next) != base);
+
+ if (need_dup) {
+ // Dup any defs that don't already have entries in deftab.
+ def = base;
+ do {
+ if (def->type == UPB_DEF_FIELD) continue;
+ const char *name = upb_def_fullname(def);
+ if (upb_strtable_lookup(deftab, name) == NULL) {
+ upb_def *newdef = upb_def_dup(def, new_owner);
+ if (!newdef) goto oom;
+ // We temporarily use this field to track who we were dup'd from.
+ newdef->refcount.next = (upb_refcount*)def;
+ if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef)))
+ goto oom;
+ }
+ } while ((def = (upb_def*)def->refcount.next) != base);
}
- bool replacing = (upb_strtable_lookup(addtab, m->base.fqname) != NULL);
- if (needcopy && !replacing) {
- upb_symtab_ent e = {upb_def_dup(def)};
- upb_strtable_insert(addtab, def->fqname, &e);
- replacing = true;
- }
- return replacing;
-}
+ upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup));
+ return need_dup;
-bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status) {
- upb_rwlock_wrlock(&s->lock);
+oom:
+ upb_status_seterrliteral(s, "out of memory");
+ return false;
+}
- // Add all defs to a table for resolution.
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+ upb_status *status) {
+ upb_def **add_defs = NULL;
upb_strtable addtab;
- upb_strtable_init(&addtab, n, sizeof(upb_symtab_ent));
+ if (!upb_strtable_init(&addtab)) {
+ upb_status_seterrliteral(status, "out of memory");
+ return false;
+ }
+
+ // Add new defs to table.
for (int i = 0; i < n; i++) {
upb_def *def = defs[i];
- if (upb_strtable_lookup(&addtab, def->fqname)) {
- upb_status_seterrf(status, "Conflicting defs named '%s'", def->fqname);
- upb_strtable_free(&addtab);
- return false;
+ assert(upb_def_ismutable(def));
+ const char *fullname = upb_def_fullname(def);
+ if (!fullname) {
+ upb_status_seterrliteral(
+ status, "Anonymous defs cannot be added to a symtab");
+ goto err;
}
- upb_strtable_insert(&addtab, def->fqname, &def);
+ if (upb_strtable_lookup(&addtab, fullname) != NULL) {
+ upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
+ goto err;
+ }
+ if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
+ goto oom_err;
+ // We temporarily use this field to indicate that we came from the user's
+ // list rather than being dup'd.
+ def->refcount.next = NULL;
}
- // All existing defs that can reach defs that are being replaced must
- // themselves be replaced with versions that will point to the new defs.
- // Do a DFS -- any path that finds a new def must replace all ancestors.
- upb_strtable *symtab = &s->symtab;
+ // Add dups of any existing def that can reach a def with the same name as
+ // one of "defs."
+ upb_inttable seen;
+ if (!upb_inttable_init(&seen)) goto oom_err;
upb_strtable_iter i;
- upb_strtable_begin(&i, symtab);
- for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
- const upb_symtab_ent *e = upb_strtable_iter_value(&i);
- upb_symtab_dfs(e->def, open_defs, 0, &addtab);
+ upb_strtable_begin(&i, &s->symtab);
+ for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+ upb_resolve_dfs(def, &addtab, ref_donor, &seen, status);
+ if (!upb_ok(status)) goto err;
}
+ upb_inttable_uninit(&seen);
- // Resolve all refs.
+ // Now using the table, resolve symbolic references.
upb_strtable_begin(&i, &addtab);
- for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- const upb_symtab_ent *e = upb_strtable_iter_value(&i);
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
+ for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+ upb_msgdef *m = upb_dyncast_msgdef(def);
+ if (!m) continue;
// Type names are resolved relative to the message in which they appear.
- const char *base = m->base.fqname;
+ const char *base = upb_def_fullname(UPB_UPCAST(m));
upb_msg_iter j;
- for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
- upb_fielddef *f = upb_msg_iter_field(j);
- if (f->type == 0) {
- upb_status_seterrf(status, "Field type was not set.");
- return false;
- }
-
- if (!upb_hassubdef(f)) continue; // No resolving necessary.
- upb_downcast_unresolveddef(f->def); // Type check.
- const char *name = f->def->fqname;
-
- // Resolve from either the addtab (pending adds) or symtab (existing
- // defs). If both exist, prefer the pending add, because it will be
- // overwriting the existing def.
- upb_symtab_ent *found;
- if(!(found = upb_resolve(&addtab, base, name)) &&
- !(found = upb_resolve(symtab, base, name))) {
- upb_status_seterrf(status, "could not resolve symbol '%s' "
- "in context '%s'", name, base);
- return false;
+ for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+ upb_fielddef *f = upb_msg_iter_field(&j);
+ const char *name = upb_fielddef_subtypename(f);
+ if (name) {
+ upb_def *subdef = upb_resolvename(&addtab, base, name);
+ if (subdef == NULL) {
+ upb_status_seterrf(
+ status, "couldn't resolve name '%s' in message '%s'", name, base);
+ goto err;
+ } else if (!upb_fielddef_setsubdef(f, subdef)) {
+ upb_status_seterrf(
+ status, "def '%s' had the wrong type for field '%s'",
+ upb_def_fullname(subdef), upb_fielddef_name(f));
+ goto err;
+ }
}
- // Check the type of the found def.
- upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
- if(found->def->type != expected) {
- upb_status_seterrliteral(status, "Unexpected type");
- return false;
- }
- if (!upb_fielddef_resolve(f, found->def, status)) return false;
+ if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) &&
+ !upb_fielddef_resolvedefault(f, status))
+ goto err;
}
}
- // The defs in the transaction have been vetted, and can be moved to the
- // symtab without causing errors.
+ // We need an array of the defs in addtab, for passing to upb_finalize.
+ add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
+ if (add_defs == NULL) goto oom_err;
upb_strtable_begin(&i, &addtab);
- for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
- upb_def_movetosymtab(tmptab_e->def, s);
- upb_symtab_ent *symtab_e =
- upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
- if(symtab_e) {
- upb_deflist_push(&s->olddefs, symtab_e->def);
- symtab_e->def = tmptab_e->def;
+ for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i))
+ add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i));
+
+ // Restore the next pointer that we stole.
+ for (int i = 0; i < n; i++)
+ add_defs[i]->refcount.next = &add_defs[i]->refcount;
+
+ if (!upb_finalize(add_defs, n, status)) goto err;
+ upb_strtable_uninit(&addtab);
+
+ for (int i = 0; i < n; i++) {
+ upb_def *def = add_defs[i];
+ const char *name = upb_def_fullname(def);
+ upb_def_donateref(def, ref_donor, s);
+ upb_value *v = upb_strtable_lookup(&s->symtab, name);
+ if(v) {
+ upb_def_unref(upb_value_getptr(*v), s);
+ upb_value_setptr(v, def);
} else {
- upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
+ upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
}
}
-
- upb_strtable_free(&addtab);
- upb_rwlock_unlock(&s->lock);
- upb_symtab_gc(s);
+ free(add_defs);
return true;
-}
-void upb_symtab_gc(upb_symtab *s) {
- (void)s;
- // TODO.
+oom_err:
+ upb_status_seterrliteral(status, "out of memory");
+err: {
+ // Need to unref any defs we dup'd (we can distinguish them from defs that
+ // the user passed in by their def->refcount.next pointers).
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, &addtab);
+ for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+ if (def->refcount.next) upb_def_unref(def, s);
+ }
+ }
+ upb_strtable_uninit(&addtab);
+ free(add_defs);
+ return false;
}
diff --git a/upb/def.h b/upb/def.h
index 462655a..452b809 100644
--- a/upb/def.h
+++ b/upb/def.h
@@ -1,17 +1,17 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2009-2011 Google Inc. See LICENSE for details.
+ * Copyright (c) 2009-2012 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * Provides a mechanism for creating and linking proto definitions.
- * These form the protobuf schema, and are used extensively throughout upb:
+ * Defs are upb's internal representation of the constructs that can appear
+ * in a .proto file:
+ *
* - upb_msgdef: describes a "message" construct.
* - upb_fielddef: describes a message field.
* - upb_enumdef: describes an enum.
* (TODO: definitions of services).
*
- *
* Defs go through two distinct phases of life:
*
* 1. MUTABLE: when first created, the properties of the def can be set freely
@@ -20,16 +20,15 @@
* not be used for any purpose except to set its properties (it can't be
* used to parse anything, create any messages in memory, etc).
*
- * 2. FINALIZED: after being added to a symtab (which links the defs together)
- * the defs become finalized (thread-safe and immutable). Programs may only
- * access defs through a CONST POINTER during this stage -- upb_symtab will
- * help you out with this requirement by only vending const pointers, but
- * you need to make sure not to use any non-const pointers you still have
- * sitting around. In practice this means that you may not call any setters
- * on the defs (or functions that themselves call the setters). If you want
- * to modify an existing immutable def, copy it with upb_*_dup(), modify the
- * copy, and add the modified def to the symtab (replacing the existing
- * def).
+ * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs,
+ * which makes them thread-safe and immutable. Finalized defs may only be
+ * accessed through a CONST POINTER. If you want to modify an existing
+ * immutable def, copy it with upb_*_dup() and modify and finalize the copy.
+ *
+ * The refcounting of defs works properly no matter what state the def is in.
+ * Once the def is finalized it is guaranteed that any def reachable from a
+ * live def is also live (so a ref on the base of a message tree keeps the
+ * whole tree alive).
*
* You can test for which stage of life a def is in by calling
* upb_def_ismutable(). This is particularly useful for dynamic language
@@ -46,181 +45,306 @@
#ifndef UPB_DEF_H_
#define UPB_DEF_H_
-#include "upb/atomic.h"
+#include "upb/refcount.h"
#include "upb/table.h"
#ifdef __cplusplus
extern "C" {
#endif
-struct _upb_symtab;
-typedef struct _upb_symtab upb_symtab;
+/* upb_def: base class for defs **********************************************/
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
- UPB_DEF_MSG = 1,
+ UPB_DEF_MSG,
+ UPB_DEF_FIELD,
UPB_DEF_ENUM,
UPB_DEF_SERVICE, // Not yet implemented.
UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*()
- UPB_DEF_UNRESOLVED = 99, // Internal-only.
} upb_deftype_t;
-
-/* upb_def: base class for defs **********************************************/
-
-typedef struct {
- char *fqname; // Fully qualified.
- upb_symtab *symtab; // Def is mutable iff symtab == NULL.
- upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
+typedef struct _upb_def {
+ upb_refcount refcount;
+ char *fullname;
upb_deftype_t type;
+ bool is_finalized;
} upb_def;
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
// Call to ref/unref a def. Can be used at any time, but is not thread-safe
-// until the def is in a symtab. While a def is in a symtab, everything
-// reachable from that def (the symtab and all defs in the symtab) are
-// guaranteed to be alive.
-void upb_def_ref(const upb_def *def);
-void upb_def_unref(const upb_def *def);
-upb_def *upb_def_dup(const upb_def *def);
-
-// A def is mutable until it has been added to a symtab.
+// until the def is finalized. While a def is finalized, everything reachable
+// from that def is guaranteed to be alive.
+void upb_def_ref(const upb_def *def, void *owner);
+void upb_def_unref(const upb_def *def, void *owner);
+void upb_def_donateref(const upb_def *def, void *from, void *to);
+upb_def *upb_def_dup(const upb_def *def, void *owner);
+
+// A def is mutable until it has been finalized.
bool upb_def_ismutable(const upb_def *def);
-INLINE const char *upb_def_fqname(const upb_def *def) { return def->fqname; }
-bool upb_def_setfqname(upb_def *def, const char *fqname); // Only if mutable.
+bool upb_def_isfinalized(const upb_def *def);
-#define UPB_UPCAST(ptr) (&(ptr)->base)
+// "fullname" is the def's fully-qualified name (eg. foo.bar.Message).
+INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
+
+// The def must be mutable. Caller retains ownership of fullname. Defs are
+// not required to have a name; if a def has no name when it is finalized, it
+// will remain an anonymous def.
+bool upb_def_setfullname(upb_def *def, const char *fullname);
+
+// Finalizes the given defs; this validates all constraints and marks the defs
+// as finalized (read-only). This will also cause fielddefs to take refs on
+// their subdefs so that any reachable def will be kept alive (but this is
+// done in a way that correctly handles circular references).
+//
+// On success, a new list is returned containing the finalized defs and
+// ownership of the "defs" list passes to the function. On failure NULL is
+// returned and the caller retains ownership of "defs."
+//
+// Symbolic references to sub-types or enum defaults must have already been
+// resolved. "defs" must contain the transitive closure of any mutable defs
+// reachable from the any def in the list. In other words, there may not be a
+// mutable def which is reachable from one of "defs" that does not appear
+// elsewhere in "defs." "defs" may not contain fielddefs, but any fielddefs
+// reachable from the given msgdefs will be finalized.
+//
+// n is currently limited to 64k defs, if more are required break them into
+// batches of 64k (or we could raise this limit, at the cost of a bigger
+// upb_def structure or complexity in upb_finalize()).
+bool upb_finalize(upb_def *const*defs, int n, upb_status *status);
/* upb_fielddef ***************************************************************/
-// A upb_fielddef describes a single field in a message. It isn't a full def
-// in the sense that it derives from upb_def. It cannot stand on its own; it
-// must be part of a upb_msgdef. It is also reference-counted.
+// We choose these to match descriptor.proto. Clients may use UPB_TYPE() and
+// UPB_LABEL() instead of referencing these directly.
+typedef enum {
+ UPB_TYPE_NONE = -1, // Internal-only, may be removed.
+ UPB_TYPE_ENDGROUP = 0, // Internal-only, may be removed.
+ UPB_TYPE_DOUBLE = 1,
+ UPB_TYPE_FLOAT = 2,
+ UPB_TYPE_INT64 = 3,
+ UPB_TYPE_UINT64 = 4,
+ UPB_TYPE_INT32 = 5,
+ UPB_TYPE_FIXED64 = 6,
+ UPB_TYPE_FIXED32 = 7,
+ UPB_TYPE_BOOL = 8,
+ UPB_TYPE_STRING = 9,
+ UPB_TYPE_GROUP = 10,
+ UPB_TYPE_MESSAGE = 11,
+ UPB_TYPE_BYTES = 12,
+ UPB_TYPE_UINT32 = 13,
+ UPB_TYPE_ENUM = 14,
+ UPB_TYPE_SFIXED32 = 15,
+ UPB_TYPE_SFIXED64 = 16,
+ UPB_TYPE_SINT32 = 17,
+ UPB_TYPE_SINT64 = 18,
+} upb_fieldtype_t;
+
+#define UPB_NUM_TYPES 19
+
+typedef enum {
+ UPB_LABEL_OPTIONAL = 1,
+ UPB_LABEL_REQUIRED = 2,
+ UPB_LABEL_REPEATED = 3,
+} upb_label_t;
+
+// These macros are provided for legacy reasons.
+#define UPB_TYPE(type) UPB_TYPE_ ## type
+#define UPB_LABEL(type) UPB_LABEL_ ## type
+
+// Info for a given field type.
+typedef struct {
+ uint8_t align;
+ uint8_t size;
+ uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
+} upb_typeinfo;
+
+extern const upb_typeinfo upb_types[UPB_NUM_TYPES];
+
+// A upb_fielddef describes a single field in a message. It is most often
+// found as a part of a upb_msgdef, but can also stand alone to represent
+// an extension.
typedef struct _upb_fielddef {
+ upb_def base;
struct _upb_msgdef *msgdef;
- upb_def *def; // if upb_hasdef(f)
- upb_atomic_t refcount;
- bool finalized;
-
- // The following fields may be modified until the def is finalized.
- uint8_t type; // Use UPB_TYPE() constants.
- uint8_t label; // Use UPB_LABEL() constants.
+ union {
+ char *name; // If subdef_is_symbolic.
+ upb_def *def; // If !subdef_is_symbolic.
+ } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f).
+ bool subdef_is_symbolic;
+ bool default_is_string;
+ bool subdef_is_owned;
+ upb_fieldtype_t type;
+ upb_label_t label;
int16_t hasbit;
uint16_t offset;
- bool default_is_string;
- bool active;
int32_t number;
- char *name;
- upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
+ upb_value defaultval; // Only for non-repeated scalars and strings.
upb_value fval;
struct _upb_accessor_vtbl *accessor;
- const void *default_ptr;
const void *prototype;
} upb_fielddef;
-upb_fielddef *upb_fielddef_new(void);
-void upb_fielddef_ref(upb_fielddef *f);
-void upb_fielddef_unref(upb_fielddef *f);
-upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
+// Returns NULL if memory allocation failed.
+upb_fielddef *upb_fielddef_new(void *owner);
+
+INLINE void upb_fielddef_ref(upb_fielddef *f, void *owner) {
+ upb_def_ref(UPB_UPCAST(f), owner);
+}
+INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) {
+ upb_def_unref(UPB_UPCAST(f), owner);
+}
+
+// Duplicates the given field, returning NULL if memory allocation failed.
+// When a fielddef is duplicated, the subdef (if any) is made symbolic if it
+// wasn't already. If the subdef is set but has no name (which is possible
+// since msgdefs are not required to have a name) the new fielddef's subdef
+// will be unset.
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner);
+
+INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) {
+ return upb_def_ismutable(UPB_UPCAST(f));
+}
+INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) {
+ return !upb_fielddef_ismutable(f);
+}
-// A fielddef is mutable until its msgdef has been added to a symtab.
-bool upb_fielddef_ismutable(const upb_fielddef *f);
+// Simple accessors. ///////////////////////////////////////////////////////////
-// Read accessors. May be called any time.
-INLINE uint8_t upb_fielddef_type(const upb_fielddef *f) { return f->type; }
-INLINE uint8_t upb_fielddef_label(const upb_fielddef *f) { return f->label; }
+INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
+ return f->type;
+}
+INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) {
+ return f->label;
+}
INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; }
-INLINE char *upb_fielddef_name(const upb_fielddef *f) { return f->name; }
+INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; }
+INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; }
+INLINE const char *upb_fielddef_name(const upb_fielddef *f) {
+ return upb_def_fullname(UPB_UPCAST(f));
+}
INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; }
-INLINE bool upb_fielddef_finalized(const upb_fielddef *f) { return f->finalized; }
INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) {
return f->msgdef;
}
INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) {
return f->accessor;
}
-INLINE const char *upb_fielddef_typename(const upb_fielddef *f) {
- return f->def ? f->def->fqname : NULL;
-}
-// Returns the default value for this fielddef, which may either be something
-// the client set explicitly or the "default default" (0 for numbers, empty for
-// strings). The field's type indicates the type of the returned value, except
-// for enums. For enums the default can be set either numerically or
-// symbolically -- the upb_fielddef_default_is_symbolic() function below will
-// indicate which it is. For string defaults, the value will be a upb_strref
-// which is invalidated by any other call on this object.
-INLINE upb_value upb_fielddef_default(const upb_fielddef *f) {
- return f->defaultval;
-}
+bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type);
+bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label);
+void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit);
+void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset);
+// TODO(haberman): need a way of keeping the fval alive even if some handlers
+// outlast the fielddef.
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
-// The results of this function are only meaningful for enum fields, which can
-// have a default specified either as an integer or as a string. If this
-// returns true, the default returned from upb_fielddef_default() is a string,
-// otherwise it is an integer.
-INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) {
- return f->default_is_string;
+// "Number" and "fullname" must be set before the fielddef is added to a msgdef.
+// For the moment we do not allow these to be set once the fielddef is added to
+// a msgdef -- this could be relaxed in the future.
+bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
+INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) {
+ return upb_def_setfullname(UPB_UPCAST(f), name);
}
-// The enum or submessage def for this field, if any. Only meaningful for
-// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true).
-// Since defs are not linked together until they are in a symtab, this
-// will return NULL until the msgdef is in a symtab.
-upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+// Field type tests. ///////////////////////////////////////////////////////////
-// Write accessors. "Number" and "name" must be set before the fielddef is
-// added to a msgdef. For the moment we do not allow these to be set once
-// the fielddef is added to a msgdef -- this could be relaxed in the future.
-bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
-bool upb_fielddef_setname(upb_fielddef *f, const char *name);
+INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
+ return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
+}
+INLINE bool upb_isstringtype(upb_fieldtype_t type) {
+ return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
+}
+INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
+ return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+}
+INLINE bool upb_issubmsg(const upb_fielddef *f) {
+ return upb_issubmsgtype(f->type);
+}
+INLINE bool upb_isstring(const upb_fielddef *f) {
+ return upb_isstringtype(f->type);
+}
+INLINE bool upb_isseq(const upb_fielddef *f) {
+ return f->label == UPB_LABEL(REPEATED);
+}
-// These writers may be called at any time prior to being put in a symtab.
-bool upb_fielddef_settype(upb_fielddef *f, uint8_t type);
-bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
-void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
-void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
+// Default value. //////////////////////////////////////////////////////////////
-// The name of the message or enum this field is referring to. Must be found
-// at name resolution time (when upb_symtab_add() is called).
+// Returns the default value for this fielddef, which may either be something
+// the client set explicitly or the "default default" (0 for numbers, empty for
+// strings). The field's type indicates the type of the returned value, except
+// for enum fields that are still mutable.
//
-// NOTE: May only be called for fields whose type has already been set to
-// be a submessage, group, or enum! Also, will be reset to empty if the
-// field's type is set again.
-bool upb_fielddef_settypename(upb_fielddef *f, const char *name);
-
-// The default value for the field. For numeric types, use
+// For enums the default can be set either numerically or symbolically -- the
+// upb_fielddef_default_is_symbolic() function below will indicate which it is.
+// For string defaults, the value will be a upb_byteregion which is invalidated
+// by any other non-const call on this object. Once the fielddef is finalized,
+// symbolic enum defaults are resolved, so finalized enum fielddefs always have
+// a default of type int32.
+INLINE upb_value upb_fielddef_default(const upb_fielddef *f) {
+ return f->defaultval;
+}
+// Sets default value for the field. For numeric types, use
// upb_fielddef_setdefault(), and "value" must match the type of the field.
-// For string/bytes types, use upb_fielddef_setdefaultstr().
-// Enum types may use either, since the default may be set either numerically
-// or symbolically.
+// For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may
+// use either, since the default may be set either numerically or symbolically.
//
// NOTE: May only be called for fields whose type has already been set.
// Also, will be reset to default if the field's type is set again.
void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
-void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len);
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len);
void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str);
-// A variety of tests about the type of a field.
-INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
- return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
-}
-INLINE bool upb_isstringtype(upb_fieldtype_t type) {
- return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
-}
-INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
- return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+// The results of this function are only meaningful for mutable enum fields,
+// which can have a default specified either as an integer or as a string. If
+// this returns true, the default returned from upb_fielddef_default() is a
+// string, otherwise it is an integer.
+INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) {
+ assert(f->type == UPB_TYPE(ENUM));
+ return f->default_is_string;
}
-INLINE bool upb_issubmsg(const upb_fielddef *f) { return upb_issubmsgtype(f->type); }
-INLINE bool upb_isstring(const upb_fielddef *f) { return upb_isstringtype(f->type); }
-INLINE bool upb_isseq(const upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
-// Does the type of this field imply that it should contain an associated def?
+// Subdef. /////////////////////////////////////////////////////////////////////
+
+// Submessage and enum fields must reference a "subdef", which is the
+// upb_msgdef or upb_enumdef that defines their type. Note that when the
+// fielddef is mutable it may not have a subdef *yet*, but this function still
+// returns true to indicate that the field's type requires a subdef.
INLINE bool upb_hassubdef(const upb_fielddef *f) {
return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}
+// Before a fielddef is finalized, its subdef may be set either directly (with
+// a upb_def*) or symbolically. Symbolic refs must be resolved before the
+// containing msgdef can be finalized (see upb_resolve() above). The client is
+// responsible for making sure that "subdef" lives until this fielddef is
+// finalized or deleted.
+//
+// Both methods require that upb_hassubdef(f) (so the type must be set prior
+// to calling these methods). Returns false if this is not the case, or if
+// the given subdef is not of the correct type. The subtype is reset if the
+// field's type is changed.
+bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef);
+bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name);
+
+// Returns the enum or submessage def or symbolic name for this field, if any.
+// Requires that upb_hassubdef(f). Returns NULL if the subdef has not been set
+// or if you ask for a subtype name when the subtype is currently set
+// symbolically (or vice-versa). To access the subtype's name for a linked
+// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)).
+//
+// Caller does *not* own a ref on the returned def or string.
+// upb_fielddef_subtypename() is non-const because finalized defs will never
+// have a symbolic reference (they must be resolved before the msgdef can be
+// finalized).
+upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f);
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+const char *upb_fielddef_subtypename(upb_fielddef *f);
+
/* upb_msgdef *****************************************************************/
@@ -232,31 +356,31 @@ typedef struct _upb_msgdef {
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
- // The following fields may be modified until finalized.
+ // The following fields may be modified while mutable.
uint16_t size;
uint8_t hasbit_bytes;
// The range of tag numbers used to store extensions.
uint32_t extstart, extend;
+ // Used for proto2 integration.
+ const void *prototype;
} upb_msgdef;
-// Hash table entries for looking up fields by name or number.
-typedef struct {
- bool junk;
- upb_fielddef *f;
-} upb_itof_ent;
-typedef struct {
- upb_fielddef *f;
-} upb_ntof_ent;
+// Returns NULL if memory allocation failed.
+upb_msgdef *upb_msgdef_new(void *owner);
-upb_msgdef *upb_msgdef_new(void);
-INLINE void upb_msgdef_unref(const upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
-INLINE void upb_msgdef_ref(const upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
+INLINE void upb_msgdef_unref(const upb_msgdef *md, void *owner) {
+ upb_def_unref(UPB_UPCAST(md), owner);
+}
+INLINE void upb_msgdef_ref(const upb_msgdef *md, void *owner) {
+ upb_def_ref(UPB_UPCAST(md), owner);
+}
// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
// the fields) but with any references to submessages broken and replaced with
-// just the name of the submessage. This can be put back into another symtab
-// and the names will be re-resolved in the new context.
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m);
+// just the name of the submessage. Returns NULL if memory allocation failed.
+// This can be put back into another symtab and the names will be re-resolved
+// in the new context.
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner);
// Read accessors. May be called at any time.
INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; }
@@ -271,38 +395,35 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);
-// Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains its
-// ref on the fielddef. May only be done before the msgdef is in a symtab
-// (requires upb_def_ismutable(m) for the msgdef). The fielddef's name and
-// number must be set, and the message may not already contain any field with
-// this name or number, and this fielddef may not be part of another message,
-// otherwise false is returned and no action is performed.
-bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *f, int n);
-INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
- return upb_msgdef_addfields(m, &f, 1);
-}
-
-// Sets the layout of all fields according to default rules:
-// 1. Hasbits for required fields come first, then optional fields.
-// 2. Values are laid out in a way that respects alignment rules.
-// 3. The order is chosen to minimize memory usage.
-// This should only be called once all fielddefs have been added.
-// TODO: will likely want the ability to exclude strings/submessages/arrays.
-// TODO: will likely want the ability to define a header size.
-void upb_msgdef_layout(upb_msgdef *m);
+// Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that the
+// msgdef and all the fielddefs are mutable. The fielddef's name and number
+// must be set, and the message may not already contain any field with this
+// name or number, and this fielddef may not be part of another message. In
+// error cases false is returned and the msgdef is unchanged.
+//
+// On success, the msgdef takes a ref on the fielddef so the caller needn't
+// worry about continuing to keep it alive (however the reverse is not true;
+// refs on the fielddef will *not* keep the msgdef alive). If ref_donor is
+// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef,
+// otherwise caller retains its reference(s) on the defs in f.
+bool upb_msgdef_addfields(
+ upb_msgdef *m, upb_fielddef *const *f, int n, void *ref_donor);
+INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f,
+ void *ref_donor) {
+ return upb_msgdef_addfields(m, &f, 1, ref_donor);
+}
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
- upb_itof_ent *e = (upb_itof_ent*)
- upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
- return e ? e->f : NULL;
+ const upb_value *val = upb_inttable_lookup32(&m->itof, i);
+ return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL;
}
INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) {
- upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
- return e ? e->f : NULL;
+ const upb_value *val = upb_strtable_lookup(&m->ntof, name);
+ return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL;
}
INLINE int upb_msgdef_numfields(const upb_msgdef *m) {
@@ -313,20 +434,19 @@ INLINE int upb_msgdef_numfields(const upb_msgdef *m) {
// TODO: the iteration should be in field order.
// Iterators are invalidated when a field is added or removed.
// upb_msg_iter i;
-// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-// upb_fielddef *f = upb_msg_iter_field(i);
+// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+// upb_fielddef *f = upb_msg_iter_field(&i);
// // ...
// }
typedef upb_inttable_iter upb_msg_iter;
-upb_msg_iter upb_msg_begin(const upb_msgdef *m);
-upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter);
-INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
+void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m);
+void upb_msg_next(upb_msg_iter *iter);
+INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); }
// Iterator accessor.
-INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
- upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
- return ent->f;
+INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) {
+ return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
}
@@ -339,84 +459,75 @@ typedef struct _upb_enumdef {
int32_t defaultval;
} upb_enumdef;
-typedef struct {
- uint32_t value;
-} upb_ntoi_ent;
-
-typedef struct {
- bool junk;
- char *str;
-} upb_iton_ent;
-
-upb_enumdef *upb_enumdef_new(void);
-INLINE void upb_enumdef_ref(const upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
-INLINE void upb_enumdef_unref(const upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e);
+// Returns NULL if memory allocation failed.
+upb_enumdef *upb_enumdef_new(void *owner);
+INLINE void upb_enumdef_ref(const upb_enumdef *e, void *owner) {
+ upb_def_ref(&e->base, owner);
+}
+INLINE void upb_enumdef_unref(const upb_enumdef *e, void *owner) {
+ upb_def_unref(&e->base, owner);
+}
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner);
-INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
+INLINE int32_t upb_enumdef_default(const upb_enumdef *e) {
+ return e->defaultval;
+}
// May only be set if upb_def_ismutable(e).
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
-// Adds a value to the enumdef. Requires that no existing val has this
-// name or number (returns false and does not add if there is). May only
-// be called before the enumdef is in a symtab.
-bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
+// Returns the number of values currently defined in the enum. Note that
+// multiple names can refer to the same number, so this may be greater than the
+// total number of unique numbers.
+INLINE int upb_enumdef_numvals(const upb_enumdef *e) {
+ return upb_strtable_count(&e->ntoi);
+}
+
+// Adds a value to the enumdef. Requires that no existing val has this name,
+// but duplicate numbers are allowed. May only be called if the enumdef is
+// mutable. Returns false if the existing name is used, or if "name" is not a
+// valid label, or on memory allocation failure (we may want to distinguish
+// these failure cases in the future).
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num);
-// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoil(upb_enumdef *e, const char *name, size_t len, int32_t *num);
-bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num);
-// Caller does not own the returned string.
-const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
+// Lookups from name to integer, returning true if found.
+bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num);
+
+// Finds the name corresponding to the given number, or NULL if none was found.
+// If more than one name corresponds to this number, returns the first one that
+// was added.
+const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined.
// Adding an enum val invalidates any iterators.
// upb_enum_iter i;
-// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
// // ...
// }
-typedef upb_inttable_iter upb_enum_iter;
+typedef upb_strtable_iter upb_enum_iter;
-upb_enum_iter upb_enum_begin(const upb_enumdef *e);
-upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter);
-INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
+void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e);
+void upb_enum_next(upb_enum_iter *iter);
+bool upb_enum_done(upb_enum_iter *iter);
// Iterator accessors.
-INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
- upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
- return e->str;
+INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) {
+ return upb_strtable_iter_key(iter);
}
-INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
- return upb_inttable_iter_key(iter);
+INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) {
+ return upb_value_getint32(upb_strtable_iter_value(iter));
}
-/* upb_deflist ****************************************************************/
-
-// upb_deflist is an internal-only dynamic array for storing a growing list of
-// upb_defs.
-typedef struct {
- upb_def **defs;
- uint32_t len;
- uint32_t size;
-} upb_deflist;
-
-void upb_deflist_init(upb_deflist *l);
-void upb_deflist_uninit(upb_deflist *l);
-void upb_deflist_push(upb_deflist *l, upb_def *d);
-
-
/* upb_symtab *****************************************************************/
-// A symtab (symbol table) is where upb_defs live. It is empty when first
-// constructed. Clients add definitions to the symtab (or replace existing
-// definitions) by calling upb_symtab_add().
-struct _upb_symtab {
- upb_atomic_t refcount;
- upb_rwlock_t lock; // Protects all members except the refcount.
- upb_strtable symtab; // The symbol table.
- upb_deflist olddefs;
-};
+// A symtab (symbol table) stores a name->def map of upb_defs. Clients could
+// always create such tables themselves, but upb_symtab has logic for resolving
+// symbolic references, which is nontrivial.
+typedef struct {
+ uint32_t refcount;
+ upb_strtable symtab;
+} upb_symtab;
upb_symtab *upb_symtab_new(void);
void upb_symtab_ref(const upb_symtab *s);
@@ -430,33 +541,47 @@ void upb_symtab_unref(const upb_symtab *s);
// within this message are searched, then within the parent, on up to the
// root namespace).
//
-// If a def is found, the caller owns one ref on the returned def. Otherwise
-// returns NULL.
+// If a def is found, the caller owns one ref on the returned def, owned by
+// owner. Otherwise returns NULL.
const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
- const char *sym);
+ const char *sym, void *owner);
-// Find an entry in the symbol table with this exact name. If a def is found,
-// the caller owns one ref on the returned def. Otherwise returns NULL.
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym);
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
+// Finds an entry in the symbol table with this exact name. If a def is found,
+// the caller owns one ref on the returned def, owned by owner. Otherwise
+// returns NULL.
+const upb_def *upb_symtab_lookup(
+ const upb_symtab *s, const char *sym, void *owner);
+const upb_msgdef *upb_symtab_lookupmsg(
+ const upb_symtab *s, const char *sym, void *owner);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
-// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
-// returned, otherwise only defs of the required type are returned.
-const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *n, upb_deftype_t type);
-
-// Adds the given defs to the symtab, resolving all symbols. Only one def per
-// name may be in the list, but defs can replace existing defs in the symtab.
+// to each symbol inside (owned by owner). If type is UPB_DEF_ANY then defs of
+// all types are returned, otherwise only defs of the required type are
+// returned.
+const upb_def **upb_symtab_getdefs(
+ const upb_symtab *s, int *n, upb_deftype_t type, void *owner);
+
+// Adds the given defs to the symtab, resolving all symbols (including enum
+// default values) and finalizing the defs. Only one def per name may be in
+// the list, but defs can replace existing defs in the symtab. All defs must
+// have a name -- anonymous defs are not allowed. Anonymous defs can still be
+// finalized by calling upb_def_finalize() directly.
+//
+// Any existing defs that can reach defs that are being replaced will
+// themselves be replaced also, so that the resulting set of defs is fully
+// consistent.
+//
+// This logic implemented in this method is a convenience; ultimately it calls
+// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
+// upb_finalize(), any of which the client could call themself. However, since
+// the logic for doing so is nontrivial, we provide it here.
+//
// The entire operation either succeeds or fails. If the operation fails, the
// symtab is unchanged, false is returned, and status indicates the error. The
-// caller retains its ref on all defs in all cases.
-bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status);
-
-// Frees defs that are no longer active in the symtab and are no longer
-// reachable. Such defs are not freed when they are replaced in the symtab
-// if they are still reachable from defs that are still referenced.
-void upb_symtab_gc(upb_symtab *s);
+// caller passes a ref on all defs to the symtab (even if the operation fails).
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+ upb_status *status);
/* upb_def casts **************************************************************/
@@ -483,9 +608,9 @@ void upb_symtab_gc(upb_symtab *s);
return (const struct _upb_ ## lower*)def; \
}
UPB_DEF_CASTS(msgdef, MSG);
+UPB_DEF_CASTS(fielddef, FIELD);
UPB_DEF_CASTS(enumdef, ENUM);
UPB_DEF_CASTS(svcdef, SERVICE);
-UPB_DEF_CASTS(unresolveddef, UNRESOLVED);
#undef UPB_DEF_CASTS
#ifdef __cplusplus
diff --git a/upb/descriptor_const.h b/upb/descriptor/descriptor_const.h
index 20058e4..52ca803 100644
--- a/upb/descriptor_const.h
+++ b/upb/descriptor/descriptor_const.h
@@ -9,79 +9,47 @@ extern "C" {
/* Enums. */
-typedef enum google_protobuf_FieldOptions_CType {
- GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
- GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
- GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
-} google_protobuf_FieldOptions_CType;
-
typedef enum google_protobuf_FieldDescriptorProto_Type {
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17
} google_protobuf_FieldDescriptorProto_Type;
typedef enum google_protobuf_FieldDescriptorProto_Label {
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1
} google_protobuf_FieldDescriptorProto_Label;
+typedef enum google_protobuf_FieldOptions_CType {
+ GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
+ GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
+ GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
+} google_protobuf_FieldOptions_CType;
+
typedef enum google_protobuf_FileOptions_OptimizeMode {
- GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2,
+ GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3
} google_protobuf_FileOptions_OptimizeMode;
/* Constants for field names and numbers. */
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1
#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path"
#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5
@@ -106,6 +74,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value"
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
+
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value"
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1
@@ -114,10 +86,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value"
#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
-
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name"
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9
@@ -138,14 +106,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
@@ -154,6 +114,14 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info"
#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1
#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name"
#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9
@@ -170,53 +138,13 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1
#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location"
@@ -230,6 +158,18 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end"
#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1
#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype"
#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14
@@ -254,18 +194,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package"
#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
-
#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16
#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services"
#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8
@@ -286,17 +214,69 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
@@ -322,6 +302,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type"
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name"
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9
@@ -330,18 +314,34 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value"
#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/upb/descriptor.c b/upb/descriptor/reader.c
index 0c589f2..8177560 100644
--- a/upb/descriptor.c
+++ b/upb/descriptor/reader.c
@@ -8,13 +8,14 @@
#include <stdlib.h>
#include <errno.h>
#include "upb/def.h"
-#include "upb/descriptor.h"
+#include "upb/descriptor/descriptor_const.h"
+#include "upb/descriptor/reader.h"
// Returns a newly allocated string that joins input strings together, for example:
// join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
// join("", "Baz") -> "Baz"
// Caller owns a ref on the returned string. */
-static char *upb_join(char *base, char *name) {
+static char *upb_join(const char *base, const char *name) {
if (!base || strlen(base) == 0) {
return strdup(name);
} else {
@@ -27,6 +28,36 @@ static char *upb_join(char *base, char *name) {
}
}
+void upb_deflist_init(upb_deflist *l) {
+ l->size = 8;
+ l->defs = malloc(l->size * sizeof(void*));
+ l->len = 0;
+ l->owned = true;
+}
+
+void upb_deflist_uninit(upb_deflist *l) {
+ if (l->owned)
+ for(size_t i = 0; i < l->len; i++)
+ upb_def_unref(l->defs[i], &l->defs);
+ free(l->defs);
+}
+
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
+ if(l->len == l->size) {
+ l->size *= 2;
+ l->defs = realloc(l->defs, l->size * sizeof(void*));
+ }
+ l->defs[l->len++] = d;
+}
+
+void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
+ assert(l->owned);
+ for (size_t i = 0; i < l->len; i++)
+ upb_def_donateref(l->defs[i], &l->defs, owner);
+ l->owned = false;
+}
+
+
/* upb_descreader ************************************************************/
static upb_def *upb_deflist_last(upb_deflist *l) {
@@ -37,8 +68,8 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
for(uint32_t i = start; i < l->len; i++) {
upb_def *def = l->defs[i];
- char *name = def->fqname;
- def->fqname = upb_join(str, name);
+ char *name = upb_join(str, upb_def_fullname(def));
+ upb_def_setfullname(def, name);
free(name);
}
}
@@ -66,9 +97,9 @@ void upb_descreader_uninit(upb_descreader *r) {
}
}
-upb_def **upb_descreader_getdefs(upb_descreader *r, int *n) {
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
*n = r->defs.len;
- r->defs.len = 0;
+ upb_deflist_donaterefs(&r->defs, owner);
return r->defs.defs;
}
@@ -204,7 +235,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
return;
}
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
- if (upb_inttable_count(&e->iton) == 0) {
+ if (upb_enumdef_numvals(e) == 0) {
// The default value of an enum (in the absence of an explicit default) is
// its first listed value.
upb_enumdef_setdefault(e, r->number);
@@ -236,18 +267,18 @@ static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
// google.protobuf.EnumDescriptorProto.
static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
upb_descreader *r = _r;
- upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
+ upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs)));
return UPB_CONTINUE;
}
static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
- if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
+ if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) {
upb_status_seterrliteral(status, "Enum had no name.");
return;
}
- if (upb_inttable_count(&e->iton) == 0) {
+ if (upb_enumdef_numvals(e) == 0) {
upb_status_seterrliteral(status, "Enum had no values.");
return;
}
@@ -258,9 +289,9 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
- upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
- free(e->base.fqname);
- e->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+ char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+ upb_def_setfullname(upb_descreader_last(r), fullname);
+ free(fullname);
return UPB_CONTINUE;
}
@@ -284,7 +315,7 @@ static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h)
static upb_flow_t upb_fielddef_startmsg(void *_r) {
upb_descreader *r = _r;
- r->f = upb_fielddef_new();
+ r->f = upb_fielddef_new(&r->defs);
free(r->default_string);
r->default_string = NULL;
return UPB_CONTINUE;
@@ -370,13 +401,12 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_fielddef *f = r->f;
// TODO: verify that all required fields were present.
- assert(f->number != -1 && f->name != NULL);
- assert((f->def != NULL) == upb_hassubdef(f));
+ assert(f->number != -1 && upb_fielddef_name(f) != NULL);
+ assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f));
// Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_descreader_top(r);
- upb_msgdef_addfield(m, f);
- upb_fielddef_unref(f);
+ upb_msgdef_addfield(m, f, &r->defs);
r->f = NULL;
if (r->default_string) {
@@ -435,7 +465,7 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
(void)fval;
upb_descreader *r = _r;
char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
- upb_fielddef_settypename(r->f, name);
+ upb_fielddef_setsubtypename(r->f, name);
free(name);
return UPB_CONTINUE;
}
@@ -479,7 +509,7 @@ static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
// google.protobuf.DescriptorProto.
static upb_flow_t upb_msgdef_startmsg(void *_r) {
upb_descreader *r = _r;
- upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
+ upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs)));
upb_descreader_startcontainer(r);
return UPB_CONTINUE;
}
@@ -487,7 +517,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_r) {
static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_msgdef *m = upb_descreader_top(r);
- if(!m->base.fqname) {
+ if(!upb_def_fullname(UPB_UPCAST(m))) {
upb_status_seterrliteral(status, "Encountered message with no name.");
return;
}
@@ -497,11 +527,10 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
- assert(val.type == UPB_TYPE(STRING));
upb_msgdef *m = upb_descreader_top(r);
- free(m->base.fqname);
- m->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val));
- upb_descreader_setscopename(r, strdup(m->base.fqname));
+ char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
+ upb_def_setfullname(UPB_UPCAST(m), name);
+ upb_descreader_setscopename(r, name); // Passes ownership of name.
return UPB_CONTINUE;
}
@@ -530,4 +559,3 @@ static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
}
#undef FNUM
#undef FTYPE
-
diff --git a/upb/descriptor.h b/upb/descriptor/reader.h
index 21099b3..0e1bfa0 100644
--- a/upb/descriptor.h
+++ b/upb/descriptor/reader.h
@@ -4,9 +4,9 @@
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * Routines for building defs by parsing descriptors in descriptor.proto format.
- * This only needs to use the public API of upb_symtab. Later we may also
- * add routines for dumping a symtab to a descriptor.
+ * upb_descreader provides a set of sink handlers that will build defs from a
+ * data source that uses the descriptor.proto schema (like a protobuf binary
+ * descriptor).
*/
#ifndef UPB_DESCRIPTOR_H
@@ -18,6 +18,20 @@
extern "C" {
#endif
+/* upb_deflist ****************************************************************/
+
+// upb_deflist is an internal-only dynamic array for storing a growing list of
+// upb_defs.
+typedef struct {
+ upb_def **defs;
+ size_t len;
+ size_t size;
+ bool owned;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);
/* upb_descreader ************************************************************/
@@ -56,11 +70,11 @@ void upb_descreader_uninit(upb_descreader *r);
upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
// Gets the array of defs that have been parsed and removes them from the
-// descreader. Ownership of the defs is passed to the caller, but the
-// ownership of the returned array is retained and is invalidated by any other
-// call into the descreader. The defs will not have been resolved, and are
-// ready to be added to a symtab.
-upb_def **upb_descreader_getdefs(upb_descreader *r, int *n);
+// descreader. Ownership of the defs is passed to the caller using the given
+// owner), but the ownership of the returned array is retained and is
+// invalidated by any other call into the descreader. The defs will not have
+// been resolved, and are ready to be added to a symtab.
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/upb/handlers.c b/upb/handlers.c
index 1ccaf8d..ea5a054 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -13,7 +13,7 @@
static upb_mhandlers *upb_mhandlers_new() {
upb_mhandlers *m = malloc(sizeof(*m));
- upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
+ upb_inttable_init(&m->fieldtab);
m->startmsg = NULL;
m->endmsg = NULL;
m->is_group = false;
@@ -26,20 +26,19 @@ static upb_mhandlers *upb_mhandlers_new() {
static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
upb_fieldtype_t type,
bool repeated) {
- upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
+ const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
// TODO: design/refine the API for changing the set of fields or modifying
// existing handlers.
- if (e) return NULL;
- upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
+ if (v) return NULL;
+ upb_fhandlers new_f = {type, repeated, 0,
n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
#ifdef UPB_USE_JIT_X64
0, 0, 0,
#endif
- NULL};
+ };
upb_fhandlers *ptr = malloc(sizeof(*ptr));
memcpy(ptr, &new_f, sizeof(upb_fhandlers));
- upb_itofhandlers_ent ent = {false, ptr};
- upb_inttable_insert(&m->fieldtab, n, &ent);
+ upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr));
return ptr;
}
@@ -64,12 +63,17 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
return f;
}
+upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) {
+ const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
+ return v ? upb_value_getptr(*v) : NULL;
+}
+
/* upb_handlers ***************************************************************/
upb_handlers *upb_handlers_new() {
upb_handlers *h = malloc(sizeof(*h));
- upb_atomic_init(&h->refcount, 1);
+ h->refcount = 1;
h->msgs_len = 0;
h->msgs_size = 4;
h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
@@ -77,19 +81,18 @@ upb_handlers *upb_handlers_new() {
return h;
}
-void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); }
+void upb_handlers_ref(upb_handlers *h) { h->refcount++; }
void upb_handlers_unref(upb_handlers *h) {
- if (upb_atomic_unref(&h->refcount)) {
+ if (--h->refcount == 0) {
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *mh = h->msgs[i];
- for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
- !upb_inttable_done(j);
- j = upb_inttable_next(&mh->fieldtab, j)) {
- upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
- free(e->f);
+ upb_inttable_iter j;
+ upb_inttable_begin(&j, &mh->fieldtab);
+ for(; !upb_inttable_done(&j); upb_inttable_next(&j)) {
+ free(upb_value_getptr(upb_inttable_iter_value(&j)));
}
- upb_inttable_free(&mh->fieldtab);
+ upb_inttable_uninit(&mh->fieldtab);
#ifdef UPB_USE_JIT_X64
free(mh->tablearray);
#endif
@@ -110,31 +113,28 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
return mh;
}
-typedef struct {
- upb_mhandlers *mh;
-} upb_mtab_ent;
-
static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m,
upb_onmsgreg *msgreg_cb,
upb_onfieldreg *fieldreg_cb,
void *closure, upb_strtable *mtab) {
upb_mhandlers *mh = upb_handlers_newmhandlers(h);
- upb_mtab_ent e = {mh};
- upb_strtable_insert(mtab, m->base.fqname, &e);
+ upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh));
if (msgreg_cb) msgreg_cb(closure, mh, m);
upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
+ for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
upb_fhandlers *fh;
if (upb_issubmsg(f)) {
upb_mhandlers *sub_mh;
- upb_mtab_ent *subm_ent;
+ const upb_value *subm_ent;
// The table lookup is necessary to break the DFS for type cycles.
- if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) {
- sub_mh = subm_ent->mh;
+ const char *subname = upb_def_fullname(upb_fielddef_subdef(f));
+ if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) {
+ sub_mh = upb_value_getptr(*subm_ent);
} else {
- sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb,
- fieldreg_cb, closure, mtab);
+ sub_mh = upb_regmsg_dfs(
+ h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)),
+ msgreg_cb, fieldreg_cb, closure, mtab);
}
fh = upb_mhandlers_newfhandlers_subm(
mh, f->number, f->type, upb_isseq(f), sub_mh);
@@ -151,10 +151,10 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
upb_onfieldreg *fieldreg_cb,
void *closure) {
upb_strtable mtab;
- upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent));
+ upb_strtable_init(&mtab);
upb_mhandlers *ret =
upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab);
- upb_strtable_free(&mtab);
+ upb_strtable_uninit(&mtab);
return ret;
}
@@ -212,6 +212,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
+ _upb_dispatcher_sethas(d->top->closure, f->hasbit);
if (sflow.flow != UPB_CONTINUE) {
_upb_dispatcher_abortjmp(d);
}
@@ -247,6 +248,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
+ _upb_dispatcher_sethas(d->top->closure, f->hasbit);
if (sflow.flow != UPB_CONTINUE) {
_upb_dispatcher_abortjmp(d);
}
diff --git a/upb/handlers.h b/upb/handlers.h
index 9ed02c1..9083a2e 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -9,6 +9,10 @@
* for each message and/or field as the data is being parsed or iterated over,
* without having to know the source format that we are parsing from. This
* decouples the parsing logic from the processing logic.
+ *
+ * TODO: should we allow handlers to longjmp()? Would be necessary to eg. let
+ * a Lua handler "yield" from the current coroutine. I *think* everything
+ * would "just work" with our current decoder.
*/
#ifndef UPB_HANDLERS_H
@@ -141,9 +145,9 @@ struct _upb_mhandlers;
typedef struct _upb_fieldent {
upb_fieldtype_t type;
bool repeated;
- upb_atomic_t refcount;
+ uint32_t refcount;
uint32_t number;
- int32_t valuehasbit;
+ int32_t hasbit;
struct _upb_mhandlers *msg;
struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true.
upb_value fval;
@@ -157,14 +161,8 @@ typedef struct _upb_fieldent {
uint32_t jit_pclabel_notypecheck;
uint32_t jit_submsg_done_pclabel;
#endif
- void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fhandlers;
-typedef struct {
- bool junk; // Stolen by table impl; see table.h for details.
- upb_fhandlers *f;
-} upb_itofhandlers_ent;
-
// fhandlers are created as part of a upb_handlers instance, but can be ref'd
// and unref'd to prolong the life of the handlers.
void upb_fhandlers_ref(upb_fhandlers *m);
@@ -174,6 +172,8 @@ void upb_fhandlers_unref(upb_fhandlers *m);
#define UPB_FHANDLERS_ACCESSORS(name, type) \
INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \
INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; }
+// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts
+// the handlers.
UPB_FHANDLERS_ACCESSORS(fval, upb_value)
UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*)
UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*)
@@ -182,11 +182,13 @@ UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*)
UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
-// If set to >= 0, the hasbit will automatically be set after the corresponding
-// callback is called (when a JIT is enabled, this can be significantly more
-// efficient than setting the hasbit yourself inside the callback). Could add
-// this for seq and submsg also, but doesn't look like a win at the moment.
-UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
+// If set to >= 0, the hasbit will automatically be set when the corresponding
+// field is parsed (when a JIT is enabled, this can be significantly more
+// efficient than setting the hasbit yourself inside the callback). For values
+// it is undefined whether the hasbit is set before or after the callback is
+// called. For seq and submsg, the hasbit is set *after* the start handler is
+// called, but before any of the handlers for the submsg or sequence.
+UPB_FHANDLERS_ACCESSORS(hasbit, int32_t)
/* upb_mhandlers **************************************************************/
@@ -195,7 +197,7 @@ UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
// message in the graph of messages.
typedef struct _upb_mhandlers {
- upb_atomic_t refcount;
+ uint32_t refcount;
upb_startmsg_handler *startmsg;
upb_endmsg_handler *endmsg;
upb_inttable fieldtab; // Maps field number -> upb_fhandlers.
@@ -203,6 +205,7 @@ typedef struct _upb_mhandlers {
#ifdef UPB_USE_JIT_X64
// Used inside the JIT to track labels (jmp targets) in the generated code.
uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message.
+ uint32_t jit_afterstartmsg_pclabel; // After calling the startmsg handler.
uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end?
uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message.
uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup.
@@ -240,11 +243,14 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*);
UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*);
+// Returns fhandlers for the given field, or NULL if none.
+upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n);
+
/* upb_handlers ***************************************************************/
struct _upb_handlers {
- upb_atomic_t refcount;
+ uint32_t refcount;
upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel.
int msgs_len, msgs_size;
bool should_jit;
@@ -267,8 +273,10 @@ upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index);
// with "fieldreg_cb"
//
// See upb_handlers_reghandlerset() below for an example.
-typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, const upb_msgdef *m);
-typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, const upb_fielddef *m);
+typedef void upb_onmsgreg(
+ void *closure, upb_mhandlers *mh, const upb_msgdef *m);
+typedef void upb_onfieldreg(
+ void *closure, upb_fhandlers *fh, const upb_fielddef *f);
upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
upb_onmsgreg *msgreg_cb,
upb_onfieldreg *fieldreg_cb,
@@ -305,8 +313,8 @@ INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) {
upb_value_setfielddef(&val, f);
upb_fhandlers_setfval(fh, val);
}
-INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgdef *m,
- upb_handlerset *hs) {
+INLINE upb_mhandlers *upb_handlers_reghandlerset(
+ upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) {
return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs);
}
@@ -373,7 +381,7 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
upb_value val) {
upb_flow_t flow = UPB_CONTINUE;
if (f->value) flow = f->value(d->top->closure, f->fval, val);
- _upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
+ _upb_dispatcher_sethas(d->top->closure, f->hasbit);
if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_startmsg(upb_dispatcher *d);
@@ -381,7 +389,8 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
-upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
+ upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
#ifdef __cplusplus
diff --git a/upb/msg.c b/upb/msg.c
index 77521e5..c671b7b 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -4,101 +4,12 @@
* Copyright (c) 2010 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * Data structure for storing a message of protobuf data.
*/
#include "upb/upb.h"
#include "upb/msg.h"
-void upb_msg_clear(void *msg, const upb_msgdef *md) {
- assert(msg != NULL);
- memset(msg, 0, md->hasbit_bytes);
- // TODO: set primitive fields to defaults?
-}
-
-void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
- assert(a != NULL);
- assert(a->len <= a->size);
- if (a->len == a->size) {
- size_t old_size = a->size;
- a->size = old_size == 0 ? 8 : (old_size * 2);
- a->ptr = realloc(a->ptr, a->size * type_size);
- memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
- }
- return &a->ptr[a->len++ * type_size];
-}
-
-#if 0
-static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
- upb_dispatcher *d);
-
-static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
- upb_dispatcher *d, upb_fhandlers *hf) {
- if (upb_issubmsg(f)) {
- upb_msg *msg = upb_value_getmsg(val);
- upb_dispatch_startsubmsg(d, hf);
- upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d);
- upb_dispatch_endsubmsg(d);
- } else {
- upb_dispatch_value(d, hf, val);
- }
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
- upb_dispatcher *d) {
- upb_msg_iter i;
- for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if (!upb_msg_has(msg, f)) continue;
- upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number);
- if (!hf) continue;
- upb_value val = upb_msg_get(msg, f);
- if (upb_isarray(f)) {
- upb_array *arr = upb_value_getarr(val);
- for (uint32_t j = 0; j < upb_array_len(arr); ++j) {
- upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf);
- }
- } else {
- upb_msg_pushval(val, f, d, hf);
- }
- }
- return UPB_CONTINUE;
-}
-
-void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
- void *closure, upb_status *status) {
- upb_dispatcher d;
- upb_dispatcher_init(&d, h, NULL, NULL, NULL);
- upb_dispatcher_reset(&d, closure);
-
- upb_dispatch_startmsg(&d);
- upb_msg_dispatch(msg, md, &d);
- upb_dispatch_endmsg(&d, status);
-
- upb_dispatcher_uninit(&d);
-}
-#endif
-
-/* Standard writers. **********************************************************/
-
-void upb_stdmsg_sethas(void *_m, upb_value fval) {
- assert(_m != NULL);
- char *m = _m;
- const upb_fielddef *f = upb_value_getfielddef(fval);
- if (f->hasbit >= 0)
- m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
-}
-
-bool upb_stdmsg_has(const void *_m, upb_value fval) {
- assert(_m != NULL);
- const char *m = _m;
- const upb_fielddef *f = upb_value_getfielddef(fval);
- return f->hasbit < 0 ||
- (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
-}
-
-#define UPB_ACCESSORS(type, ctype) \
+#define UPB_ACCESSOR(type, ctype) \
upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \
upb_value val) { \
assert(_m != NULL); \
@@ -108,230 +19,17 @@ bool upb_stdmsg_has(const void *_m, upb_value fval) {
*(ctype*)&m[f->offset] = upb_value_get ## type(val); \
return UPB_CONTINUE; \
} \
- \
- upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \
- upb_value val) { \
- (void)_fval; \
- assert(a != NULL); \
- ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \
- *p = upb_value_get ## type(val); \
- return UPB_CONTINUE; \
- } \
- \
- upb_value upb_stdmsg_get ## type(const void *_m, upb_value fval) { \
- assert(_m != NULL); \
- const uint8_t *m = _m; \
- const upb_fielddef *f = upb_value_getfielddef(fval); \
- upb_value ret; \
- upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \
- return ret; \
- } \
- upb_value upb_stdmsg_seqget ## type(const void *i) { \
- assert(i != NULL); \
- upb_value val; \
- upb_value_set ## type(&val, *(ctype*)i); \
- return val; \
- }
-UPB_ACCESSORS(double, double)
-UPB_ACCESSORS(float, float)
-UPB_ACCESSORS(int32, int32_t)
-UPB_ACCESSORS(int64, int64_t)
-UPB_ACCESSORS(uint32, uint32_t)
-UPB_ACCESSORS(uint64, uint64_t)
-UPB_ACCESSORS(bool, bool)
-UPB_ACCESSORS(ptr, void*)
+UPB_ACCESSOR(double, double)
+UPB_ACCESSOR(float, float)
+UPB_ACCESSOR(int32, int32_t)
+UPB_ACCESSOR(int64, int64_t)
+UPB_ACCESSOR(uint32, uint32_t)
+UPB_ACCESSOR(uint64, uint64_t)
+UPB_ACCESSOR(bool, bool)
+UPB_ACCESSOR(ptr, void*)
#undef UPB_ACCESSORS
-static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
- upb_stdarray **dstp = _dst;
- upb_stdarray *dst = *dstp;
- if (!dst) {
- dst = malloc(sizeof(*dst));
- dst->size = 0;
- dst->ptr = NULL;
- *dstp = dst;
- }
- dst->len = 0;
- const upb_byteregion *bytes = upb_value_getbyteregion(src);
- uint32_t len = upb_byteregion_len(bytes);
- if (len > dst->size) {
- dst->size = len;
- dst->ptr = realloc(dst->ptr, dst->size);
- }
- dst->len = len;
- upb_byteregion_copyall(bytes, dst->ptr);
-}
-
-upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
- assert(_m != NULL);
- char *m = _m;
- const upb_fielddef *f = upb_value_getfielddef(fval);
- // Hasbit automatically set by the handlers.
- _upb_stdmsg_setstr(&m[f->offset], val);
- return UPB_CONTINUE;
-}
-
-upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
- assert(a != NULL);
- (void)fval;
- _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
- return UPB_CONTINUE;
-}
-
-upb_value upb_stdmsg_getstr(const void *m, upb_value fval) {
- assert(m != NULL);
- return upb_stdmsg_getptr(m, fval);
-}
-
-upb_value upb_stdmsg_seqgetstr(const void *i) {
- assert(i != NULL);
- return upb_stdmsg_seqgetptr(i);
-}
-
-void *upb_stdmsg_new(const upb_msgdef *md) {
- void *m = malloc(md->size);
- memset(m, 0, md->size);
- upb_msg_clear(m, md);
- return m;
-}
-
-void upb_stdseq_free(void *s, upb_fielddef *f) {
- upb_stdarray *a = s;
- if (upb_issubmsg(f) || upb_isstring(f)) {
- void **p = (void**)a->ptr;
- for (uint32_t i = 0; i < a->size; i++) {
- if (upb_issubmsg(f)) {
- upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
- } else {
- upb_stdarray *str = p[i];
- free(str->ptr);
- free(str);
- }
- }
- }
- free(a->ptr);
- free(a);
-}
-
-void upb_stdmsg_free(void *m, const upb_msgdef *md) {
- if (m == NULL) return;
- upb_msg_iter i;
- for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
- void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
- if (subp == NULL) continue;
- if (upb_isseq(f)) {
- upb_stdseq_free(subp, f);
- } else if (upb_issubmsg(f)) {
- upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
- } else {
- upb_stdarray *str = subp;
- free(str->ptr);
- free(str);
- }
- }
- free(m);
-}
-
-upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
- char *m = _m;
- const upb_fielddef *f = upb_value_getfielddef(fval);
- upb_stdarray **arr = (void*)&m[f->offset];
- if (!upb_stdmsg_has(_m, fval)) {
- if (!*arr) {
- *arr = malloc(sizeof(**arr));
- (*arr)->size = 0;
- (*arr)->ptr = NULL;
- }
- (*arr)->len = 0;
- upb_stdmsg_sethas(m, fval);
- }
- return UPB_CONTINUE_WITH(*arr);
-}
-
-void upb_stdmsg_recycle(void **m, const upb_msgdef *md) {
- if (*m)
- upb_msg_clear(*m, md);
- else
- *m = upb_stdmsg_new(md);
-}
-
-upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
- assert(_m != NULL);
- char *m = _m;
- const upb_fielddef *f = upb_value_getfielddef(fval);
- void **subm = (void*)&m[f->offset];
- if (!upb_stdmsg_has(m, fval)) {
- upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
- upb_stdmsg_sethas(m, fval);
- }
- return UPB_CONTINUE_WITH(*subm);
-}
-
-upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
- assert(a != NULL);
- const upb_fielddef *f = upb_value_getfielddef(fval);
- void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
- upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
- return UPB_CONTINUE_WITH(*subm);
-}
-
-const void *upb_stdmsg_seqbegin(const void *_a) {
- const upb_stdarray *a = _a;
- return a->len > 0 ? a->ptr : NULL;
-}
-
-#define NEXTFUNC(size) \
- const void *upb_stdmsg_ ## size ## byte_seqnext(const void *_a, const void *iter) {\
- const upb_stdarray *a = _a; \
- const void *next = (char*)iter + size; \
- return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \
- }
-
-NEXTFUNC(8)
-NEXTFUNC(4)
-NEXTFUNC(1)
-
-#define STDMSG(type, size) { static upb_accessor_vtbl vtbl = { \
- &upb_stdmsg_startsubmsg, \
- &upb_stdmsg_set ## type, \
- &upb_stdmsg_startseq, \
- &upb_stdmsg_startsubmsg_r, \
- &upb_stdmsg_set ## type ## _r, \
- &upb_stdmsg_has, \
- &upb_stdmsg_getptr, \
- &upb_stdmsg_get ## type, \
- &upb_stdmsg_seqbegin, \
- &upb_stdmsg_ ## size ## byte_seqnext, \
- &upb_stdmsg_seqget ## type}; \
- return &vtbl; }
-
-upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
- switch (f->type) {
- case UPB_TYPE(DOUBLE): STDMSG(double, 8)
- case UPB_TYPE(FLOAT): STDMSG(float, 4)
- case UPB_TYPE(UINT64):
- case UPB_TYPE(FIXED64): STDMSG(uint64, 8)
- case UPB_TYPE(INT64):
- case UPB_TYPE(SFIXED64):
- case UPB_TYPE(SINT64): STDMSG(int64, 8)
- case UPB_TYPE(INT32):
- case UPB_TYPE(SINT32):
- case UPB_TYPE(ENUM):
- case UPB_TYPE(SFIXED32): STDMSG(int32, 4)
- case UPB_TYPE(UINT32):
- case UPB_TYPE(FIXED32): STDMSG(uint32, 4)
- case UPB_TYPE(BOOL): STDMSG(bool, 1)
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES):
- case UPB_TYPE(GROUP):
- case UPB_TYPE(MESSAGE): STDMSG(str, 8) // TODO: 32-bit
- }
- return NULL;
-}
-
static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
const upb_fielddef *f) {
(void)c;
@@ -344,7 +42,7 @@ static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
} else {
upb_fhandlers_setvalue(fh, f->accessor->set);
upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg);
- upb_fhandlers_setvaluehasbit(fh, f->hasbit);
+ upb_fhandlers_sethasbit(fh, f->hasbit);
}
}
}
diff --git a/upb/msg.h b/upb/msg.h
index 67903d0..7aaaf2a 100644
--- a/upb/msg.h
+++ b/upb/msg.h
@@ -68,34 +68,18 @@ typedef struct _upb_accessor_vtbl {
upb_seqget_handler *seqget;
} upb_accessor_vtbl;
-// Registers handlers for writing into a message of the given type.
+// Registers handlers for writing into a message of the given type using
+// whatever accessors it has defined.
upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m);
-// Returns an stdmsg accessor for the given fielddef.
-upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
-
-
-/* upb_msg/upb_seq ************************************************************/
-
-// upb_msg and upb_seq allow for generic access to a message through its
-// accessor vtable. Note that these do *not* allow you to create, destroy, or
-// take references on the objects -- these operations are specifically outside
-// the scope of what the accessors define.
-
-// Clears all hasbits.
-// TODO: Add a separate function for setting primitive values back to their
-// defaults (but not strings, submessages, or arrays).
-void upb_msg_clear(void *msg, const upb_msgdef *md);
-
INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) {
((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8));
}
-// Could add a method that recursively clears submessages, strings, and
-// arrays if desired. This could be a win if you wanted to merge without
-// needing hasbits, because during parsing you would never clear submessages
-// or arrays. Also this could be desired to provide proto2 operations on
-// generated messages.
+/* upb_msg/upb_seq ************************************************************/
+
+// These accessor functions are simply convenience methods for reading or
+// writing to a message through its accessors.
INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) {
return f->accessor && f->accessor->has(m, f->fval);
@@ -148,65 +132,11 @@ INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md,
return true;
}
-
-/* upb_msgvisitor *************************************************************/
-
-// A upb_msgvisitor reads data from an in-memory structure using its accessors,
-// pushing the results to a given set of upb_handlers.
-// TODO: not yet implemented.
-
-typedef struct {
- upb_fhandlers *fh;
- upb_fielddef *f;
- uint16_t msgindex; // Only when upb_issubmsg(f).
-} upb_msgvisitor_field;
-
-typedef struct {
- upb_msgvisitor_field *fields;
- int fields_len;
-} upb_msgvisitor_msg;
-
-typedef struct {
- uint16_t msgindex;
- uint16_t fieldindex;
- uint32_t arrayindex; // UINT32_MAX if not an array frame.
-} upb_msgvisitor_frame;
-
-typedef struct {
- upb_msgvisitor_msg *messages;
- int messages_len;
- upb_dispatcher dispatcher;
-} upb_msgvisitor;
-
-// Initializes a msgvisitor that will push data from messages of the given
-// msgdef to the given set of handlers.
-void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
-void upb_msgvisitor_uninit(upb_msgvisitor *v);
-
-void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
-void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
-
-
-/* Standard writers. **********************************************************/
-
-// Allocates a new stdmsg.
-void *upb_stdmsg_new(const upb_msgdef *md);
-
-// Recursively frees any strings or submessages that the message refers to.
-void upb_stdmsg_free(void *m, const upb_msgdef *md);
-
-void upb_stdmsg_sethas(void *_m, upb_value fval);
-
-// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit.
-upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
-upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
- uint16_t subm_size, uint8_t subm_setbytes);
-
// Value writers for every in-memory type: write the data to a known offset
-// from the closure "c" and set the hasbit (if any).
-// TODO: can we get away with having only one for int64, uint64, double, etc?
-// The main thing in the way atm is that the upb_value is strongly typed.
-// in debug mode.
+// from the closure "c."
+//
+// TODO(haberman): instead of having standard writer functions, should we have
+// a bool in the accessor that says "write raw value to the field's offset"?
upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
@@ -216,94 +146,6 @@ upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val);
-// Value writers for repeated fields: the closure points to a standard array
-// struct, appends the value to the end of the array, resizing with realloc()
-// if necessary.
-typedef struct {
- char *ptr;
- uint32_t len; // Number of elements present.
- uint32_t size; // Number of elements allocated.
-} upb_stdarray;
-
-void *upb_stdarray_append(upb_stdarray *a, size_t type_size);
-
-upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setptr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for C strings (NULL-terminated): we can find a char* at a known
-// offset from the closure "c". Calls realloc() on the pointer to allocate
-// the memory (TODO: investigate whether checking malloc_usable_size() would
-// be cheaper than realloc()). Also sets the hasbit, if any.
-//
-// Since the string is NULL terminated and does not store an explicit length,
-// these are not suitable for binary data that can contain NULLs.
-upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for length-delimited strings: we explicitly store the length, so
-// the data can contain NULLs. Stores the data using upb_stdarray
-// which is located at a known offset from the closure "c" (note that it
-// is included inline rather than pointed to). Also sets the hasbit, if any.
-upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for startseq and startmsg which allocate (or reuse, if possible)
-// a sub data structure (upb_stdarray or a submessage, respectively),
-// setting the hasbit. If the hasbit is already set, the existing data
-// structure is used verbatim. If the hasbit is not already set, the pointer
-// is checked for NULL. If it is NULL, a new substructure is allocated,
-// cleared, and used. If it is not NULL, the existing substructure is
-// cleared and reused.
-//
-// If there is no hasbit, we always behave as if the hasbit was not set,
-// so any existing data for this array or submessage is cleared. In most
-// cases this will be fine since each array or non-repeated submessage should
-// occur at most once in the stream. But if the client is using "concatenation
-// as merging", it will want to make sure hasbits are allocated so merges can
-// happen appropriately.
-//
-// If there was a demand for the behavior that absence of a hasbit acts as if
-// the bit was always set, we could provide that also. But Clear() would need
-// to act recursively, which is less efficient since it requires an extra pass
-// over the tree.
-upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
-upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
-upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
-
-
-/* Standard readers. **********************************************************/
-
-bool upb_stdmsg_has(const void *c, upb_value fval);
-const void *upb_stdmsg_seqbegin(const void *c);
-
-upb_value upb_stdmsg_getint64(const void *c, upb_value fval);
-upb_value upb_stdmsg_getint32(const void *c, upb_value fval);
-upb_value upb_stdmsg_getuint64(const void *c, upb_value fval);
-upb_value upb_stdmsg_getuint32(const void *c, upb_value fval);
-upb_value upb_stdmsg_getdouble(const void *c, upb_value fval);
-upb_value upb_stdmsg_getfloat(const void *c, upb_value fval);
-upb_value upb_stdmsg_getbool(const void *c, upb_value fval);
-upb_value upb_stdmsg_getptr(const void *c, upb_value fval);
-
-const void *upb_stdmsg_8byte_seqnext(const void *c, const void *iter);
-const void *upb_stdmsg_4byte_seqnext(const void *c, const void *iter);
-const void *upb_stdmsg_1byte_seqnext(const void *c, const void *iter);
-
-upb_value upb_stdmsg_seqgetint64(const void *c);
-upb_value upb_stdmsg_seqgetint32(const void *c);
-upb_value upb_stdmsg_seqgetuint64(const void *c);
-upb_value upb_stdmsg_seqgetuint32(const void *c);
-upb_value upb_stdmsg_seqgetdouble(const void *c);
-upb_value upb_stdmsg_seqgetfloat(const void *c);
-upb_value upb_stdmsg_seqgetbool(const void *c);
-upb_value upb_stdmsg_seqgetptr(const void *c);
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 06125dd..b0e2392 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -13,6 +13,33 @@
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
+typedef struct {
+ uint8_t native_wire_type;
+ bool is_numeric;
+} upb_decoder_typeinfo;
+
+static const upb_decoder_typeinfo upb_decoder_types[] = {
+ {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP
+ {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE
+ {UPB_WIRE_TYPE_32BIT, true}, // FLOAT
+ {UPB_WIRE_TYPE_VARINT, true}, // INT64
+ {UPB_WIRE_TYPE_VARINT, true}, // UINT64
+ {UPB_WIRE_TYPE_VARINT, true}, // INT32
+ {UPB_WIRE_TYPE_64BIT, true}, // FIXED64
+ {UPB_WIRE_TYPE_32BIT, true}, // FIXED32
+ {UPB_WIRE_TYPE_VARINT, true}, // BOOL
+ {UPB_WIRE_TYPE_DELIMITED, false}, // STRING
+ {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP
+ {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE
+ {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES
+ {UPB_WIRE_TYPE_VARINT, true}, // UINT32
+ {UPB_WIRE_TYPE_VARINT, true}, // ENUM
+ {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32
+ {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64
+ {UPB_WIRE_TYPE_VARINT, true}, // SINT32
+ {UPB_WIRE_TYPE_VARINT, true}, // SINT64
+};
+
/* upb_decoderplan ************************************************************/
#ifdef UPB_USE_JIT_X64
@@ -32,37 +59,6 @@
#include "upb/pb/decoder_x64.h"
#endif
-typedef struct {
- upb_fhandlers base;
- void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
-#ifdef UPB_USE_JIT_X64
- uint32_t jit_pclabel;
- uint32_t jit_pclabel_notypecheck;
-#endif
-} upb_dplanfield;
-
-typedef struct {
- upb_mhandlers base;
-#ifdef UPB_USE_JIT_X64
- uint32_t jit_startmsg_pclabel;
- uint32_t jit_endofbuf_pclabel;
- uint32_t jit_endofmsg_pclabel;
- uint32_t jit_dyndispatch_pclabel;
- uint32_t jit_unknownfield_pclabel;
- int32_t jit_parent_field_done_pclabel;
- uint32_t max_field_number;
- // Currently keyed on field number. Could also try keying it
- // on encoded or decoded tag, or on encoded field number.
- void **tablearray;
-#endif
-} upb_dplanmsg;
-
-static void *upb_decoderplan_fptrs[];
-
-void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
- f->decode = upb_decoderplan_fptrs[f->type];
-}
-
upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
upb_decoderplan *p = malloc(sizeof(*p));
p->handlers = h;
@@ -72,17 +68,6 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
p->jit_code = NULL;
if (allowjit) upb_decoderplan_makejit(p);
#endif
- // Set function pointers for each field's decode function.
- for (int i = 0; i < h->msgs_len; i++) {
- upb_mhandlers *m = h->msgs[i];
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
- !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
- upb_fhandlers *f = e->f;
- upb_decoderplan_initfhandlers(f);
- }
- }
return p;
}
@@ -396,14 +381,6 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push_msg(d, f, upb_decoder_offset(d) + len);
}
-#define F(type) &upb_decode_ ## type
-static void *upb_decoderplan_fptrs[] = {
- &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
- F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
- F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
- F(SFIXED64), F(SINT32), F(SINT64)};
-#undef F
-
/* The main decoding loop *****************************************************/
@@ -431,16 +408,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
uint32_t fieldnum = tag >> 3;
- upb_itofhandlers_ent *e = upb_inttable_fastlookup(
- d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
- upb_fhandlers *f = e ? e->f : NULL;
+ const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum);
+ upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL;
+ bool is_packed = false;
if (f) {
// Wire type check.
- if (wire_type == upb_types[f->type].native_wire_type ||
- (wire_type == UPB_WIRE_TYPE_DELIMITED &&
- upb_types[f->type].is_numeric)) {
+ if (wire_type == upb_decoder_types[f->type].native_wire_type) {
// Wire type is ok.
+ } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
+ upb_decoder_types[f->type].is_numeric)) {
+ // Wire type is ok (and packed).
+ is_packed = true;
} else {
f = NULL;
}
@@ -453,19 +432,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
if (fr->is_sequence && fr->f != f) {
upb_dispatch_endseq(&d->dispatcher);
upb_decoder_setmsgend(d);
+ fr = d->dispatcher.top;
}
- if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
- uint64_t old_end = d->dispatcher.top->end_ofs;
- upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
- if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
- upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) {
- // Non-packed field -- this tag pertains to only a single message.
- fr->end_ofs = old_end;
- } else {
+ if (f && f->repeated && !fr->is_sequence) {
+ upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f);
+ if (is_packed) {
// Packed primitive field.
uint32_t len = upb_decode_varint32(d);
- fr->end_ofs = upb_decoder_offset(d) + len;
- fr->is_packed = true;
+ fr2->end_ofs = upb_decoder_offset(d) + len;
+ fr2->is_packed = true;
+ } else {
+ // Non-packed field -- this tag pertains to only a single message.
+ fr2->end_ofs = fr->end_ofs;
}
upb_decoder_setmsgend(d);
}
@@ -513,13 +491,37 @@ upb_success_t upb_decoder_decode(upb_decoder *d) {
if (!d->top_is_packed) f = upb_decode_tag(d);
if (!f) {
// Sucessful EOF. We may need to dispatch a top-level implicit frame.
- if (d->dispatcher.top == d->dispatcher.stack + 1) {
- assert(d->dispatcher.top->is_sequence);
+ if (d->dispatcher.top->is_sequence) {
+ assert(d->dispatcher.top == d->dispatcher.stack + 1);
upb_dispatch_endseq(&d->dispatcher);
}
+ assert(d->dispatcher.top == d->dispatcher.stack);
+ upb_dispatch_endmsg(&d->dispatcher, &d->status);
return UPB_OK;
}
- f->decode(d, f);
+
+ switch (f->type) {
+ case UPB_TYPE_ENDGROUP: upb_endgroup(d, f); break;
+ case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break;
+ case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break;
+ case UPB_TYPE(INT64): upb_decode_INT64(d, f); break;
+ case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break;
+ case UPB_TYPE(INT32): upb_decode_INT32(d, f); break;
+ case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break;
+ case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break;
+ case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break;
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break;
+ case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break;
+ case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break;
+ case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break;
+ case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break;
+ case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break;
+ case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break;
+ case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break;
+ case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break;
+ case UPB_TYPE_NONE: assert(false); break;
+ }
upb_decoder_checkpoint(d);
}
}
@@ -542,7 +544,6 @@ void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
void *closure) {
assert(d->plan);
- assert(upb_byteregion_discardofs(input) == upb_byteregion_startofs(input));
upb_dispatcher_frame *f =
upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
upb_status_clear(&d->status);
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index fa984ef..f58e403 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -9,8 +9,8 @@
|// parsing the specific message and calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
-|// function) we must respect alignment rules. On OS X, this means aligning
-|// the stack to 16 bytes.
+|// function) we must respect alignment rules. All x86-64 systems require
+|// 16-byte stack alignment.
#include <sys/mman.h>
#include "dynasm/dasm_x86.h"
@@ -103,7 +103,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) {
// Has to be a separate function, otherwise GCC will complain about
// expressions like (&foo != NULL) because they will never evaluate
// to false.
-static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
|.arch x64
|.actionlist upb_jit_actionlist
@@ -401,45 +401,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
}
}
-#if 0
-// These appear not to speed things up, but keeping around for
-// further experimentation.
-static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
- upb_fhandlers *f) {
- | mov eax, STDARRAY:ARG1_64->len
- | cmp eax, STDARRAY:ARG1_64->size
- | jne >2
- // If array is full, fall back to actual function.
- | loadfval f
- | callp f->value
- | jmp >3
- |2:
- | mov rcx, STDARRAY:ARG1_64->ptr
- | mov esi, eax
- | add eax, 1
-
- switch (size) {
- case 8:
- | mov [rcx + rsi * 8], ARG3_64
- break;
-
- case 4:
- | mov [rcx + rsi * 4], ARG3_32
- break;
-
- case 1:
- | mov [rcx + rsi * 4], ARG3_8
- break;
- }
-
- | mov STDARRAY:ARG1_64->len, eax
- |3:
-}
-#endif
-
static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
upb_fhandlers *f) {
- // Call callbacks.
+ // Call callbacks. Specializing the append accessors didn't yield a speed
+ // increase in benchmarks.
if (upb_issubmsgtype(f->type)) {
if (f->type == UPB_TYPE(MESSAGE)) {
| mov rsi, PTR
@@ -457,7 +422,10 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->startsubmsg
+ | sethas CLOSURE, f->hasbit
| mov CLOSURE, rdx
+ } else {
+ | sethas CLOSURE, f->hasbit
}
| mov qword FRAME->closure, CLOSURE
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
@@ -465,6 +433,7 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
| call =>sub_m->jit_startmsg_pclabel;
+ | popframe upb_fhandlers_getmsg(f)
// Call endsubmsg handler (if any).
if (f->endsubmsg) {
@@ -473,7 +442,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
| loadfval f
| callp f->endsubmsg
}
- | popframe upb_fhandlers_getmsg(f)
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
} else {
@@ -494,21 +462,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
} else if (f->value == &upb_stdmsg_setbool) {
const upb_fielddef *fd = upb_value_getfielddef(f->fval);
| mov [ARG1_64 + fd->offset], ARG3_8
-#if 0
- // These appear not to speed things up, but keeping around for
- // further experimentation.
- } else if (f->value == &upb_stdmsg_setint64_r ||
- f->value == &upb_stdmsg_setuint64_r ||
- f->value == &upb_stdmsg_setptr_r ||
- f->value == &upb_stdmsg_setdouble_r) {
- upb_decoderplan_jit_doappend(plan, 8, f);
- } else if (f->value == &upb_stdmsg_setint32_r ||
- f->value == &upb_stdmsg_setuint32_r ||
- f->value == &upb_stdmsg_setfloat_r) {
- upb_decoderplan_jit_doappend(plan, 4, f);
- } else if (f->value == &upb_stdmsg_setbool_r) {
- upb_decoderplan_jit_doappend(plan, 1, f);
-#endif
} else if (f->value) {
// Load closure and fval into arg registers.
||#ifndef NDEBUG
@@ -520,16 +473,26 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
| loadfval f
| callp f->value
}
- | sethas CLOSURE, f->valuehasbit
+ | sethas CLOSURE, f->hasbit
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
}
}
+static uint64_t upb_get_encoded_tag(upb_fhandlers *f) {
+ uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type;
+ uint64_t encoded_tag = upb_vencode32(tag);
+ // No tag should be greater than 5 bytes.
+ assert(encoded_tag <= 0xffffffffff);
+ return encoded_tag;
+}
+
// PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
- uint64_t next_tag, upb_mhandlers *m,
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m,
upb_fhandlers *f, upb_fhandlers *next_f) {
+ uint64_t tag = upb_get_encoded_tag(f);
+ uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
+
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
// table is keyed on field number, not type.
@@ -541,10 +504,13 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
| mov rsi, FRAME->end_ofs
| pushframe f, rsi, true
if (f->startseq) {
- | mov ARG1_64, CLOSURE
+ | mov ARG1_64, CLOSURE
| loadfval f
- | callp f->startseq
- | mov CLOSURE, rdx
+ | callp f->startseq
+ | sethas CLOSURE, f->hasbit
+ | mov CLOSURE, rdx
+ } else {
+ | sethas CLOSURE, f->hasbit
}
| mov qword FRAME->closure, CLOSURE
}
@@ -590,6 +556,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
}
static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
+ |=>m->jit_afterstartmsg_pclabel:
+ // There was a call to get here, so we need to align the stack.
+ | sub rsp, 8
+ | jmp >1
+
|=>m->jit_startmsg_pclabel:
// There was a call to get here, so we need to align the stack.
| sub rsp, 8
@@ -602,6 +573,7 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
+ |1:
| setmsgend m
| check_eob m
| mov ecx, dword [PTR]
@@ -616,30 +588,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
- !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- keys[idx++] = upb_inttable_iter_key(i);
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, &m->fieldtab);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ keys[idx++] = upb_inttable_iter_key(&i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
- upb_fhandlers *last_f = NULL;
- uint64_t last_encoded_tag = 0;
for(int i = 0; i < num_keys; i++) {
- uint32_t fieldnum = keys[i];
- upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
- upb_fhandlers *f = e->f;
- assert(f->number == fieldnum);
- uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
- uint64_t encoded_tag = upb_vencode32(tag);
- // No tag should be greater than 5 bytes.
- assert(encoded_tag <= 0xffffffffff);
- if (last_f) upb_decoderplan_jit_field(
- plan, last_encoded_tag, encoded_tag, m, last_f, f);
- last_encoded_tag = encoded_tag;
- last_f = f;
+ upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]);
+ upb_fhandlers *next_f =
+ (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL;
+ upb_decoderplan_jit_field(plan, m, f, next_f);
}
- upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
free(keys);
@@ -733,18 +694,19 @@ static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
+ m->jit_afterstartmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_dyndispatch_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
m->max_field_number = 0;
upb_inttable_iter i;
- for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- uint32_t key = upb_inttable_iter_key(i);
+ upb_inttable_begin(&i, &m->fieldtab);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ uint32_t key = upb_inttable_iter_key(&i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
- upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
- upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
+ upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i));
+ upb_decoderplan_jit_assignfieldlabs(f, pclabel_count);
}
// TODO: support large field numbers by either using a hash table or
// generating code for a binary search. For now large field numbers
@@ -784,11 +746,12 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
+ // We jump to after the startmsg handler since it is called before entering
+ // the JIT (either by upb_decoder or by a previous call to the JIT).
m->jit_func =
- plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
+ plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel);
for (uint32_t j = 0; j <= m->max_field_number; j++) {
- upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
- upb_fhandlers *f = e ? e->f : NULL;
+ upb_fhandlers *f = upb_mhandlers_lookup(m, j);
if (f) {
m->tablearray[j] =
plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 4949fe3..40b901d 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -1,84 +1,17 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2010 Google Inc. See LICENSE for details.
+ * Copyright (c) 2010-2012 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb/bytestream.h"
-#include "upb/descriptor.h"
-#include "upb/msg.h"
+#include "upb/descriptor/reader.h"
#include "upb/pb/decoder.h"
#include "upb/pb/glue.h"
-#include "upb/pb/textprinter.h"
-
-bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
- bool allow_jit, upb_status *status) {
- upb_stringsrc strsrc;
- upb_stringsrc_init(&strsrc);
- upb_stringsrc_reset(&strsrc, str, len);
-
- upb_decoder d;
- upb_handlers *h = upb_handlers_new();
- upb_accessors_reghandlers(h, md);
- upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
- upb_decoder_init(&d);
- upb_handlers_unref(h);
- upb_decoder_resetplan(&d, p, 0);
- upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
- upb_success_t ret = upb_decoder_decode(&d);
- // stringsrc and the handlers registered by upb_accessors_reghandlers()
- // should not suspend.
- assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
- if (status) upb_status_copy(status, upb_decoder_status(&d));
-
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
- upb_decoderplan_unref(p);
- return ret == UPB_OK;
-}
-
-void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
- void *msg = upb_stdmsg_new(md);
- size_t len;
- char *data = upb_readfile(fname, &len);
- if (!data) goto err;
- upb_strtomsg(data, len, msg, md, false, s);
- if (!upb_ok(s)) goto err;
- return msg;
-
-err:
- upb_stdmsg_free(msg, md);
- return NULL;
-}
-
-#if 0
-void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
- bool single_line) {
- upb_stringsink strsink;
- upb_stringsink_init(&strsink);
- upb_stringsink_reset(&strsink, str);
-
- upb_textprinter *p = upb_textprinter_new();
- upb_handlers *h = upb_handlers_new();
- upb_textprinter_reghandlers(h, md);
- upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line);
-
- upb_status status = UPB_STATUS_INIT;
- upb_msg_runhandlers(msg, md, h, p, &status);
- // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be
- // capable of returning an error.
- assert(upb_ok(&status));
- upb_status_uninit(&status);
-
- upb_stringsink_uninit(&strsink);
- upb_textprinter_free(p);
- upb_handlers_unref(h);
-}
-#endif
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
- upb_status *status) {
+ void *owner, upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
@@ -104,35 +37,20 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_descreader_uninit(&r);
return NULL;
}
- upb_def **defs = upb_descreader_getdefs(&r, n);
+ upb_def **defs = upb_descreader_getdefs(&r, owner, n);
upb_def **defscopy = malloc(sizeof(upb_def*) * (*n));
memcpy(defscopy, defs, sizeof(upb_def*) * (*n));
upb_descreader_uninit(&r);
- // Set default accessors and layouts on all messages.
- for(int i = 0; i < *n; i++) {
- upb_def *def = defscopy[i];
- upb_msgdef *md = upb_dyncast_msgdef(def);
- if (!md) continue;
- // For field in msgdef:
- upb_msg_iter i;
- for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
- }
- upb_msgdef_layout(md);
- }
-
return defscopy;
}
bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
upb_status *status) {
int n;
- upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, status);
+ upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
if (!defs) return false;
- bool success = upb_symtab_add(s, defs, n, status);
- for(int i = 0; i < n; i++) upb_def_unref(defs[i]);
+ bool success = upb_symtab_add(s, defs, n, &defs, status);
free(defs);
return success;
}
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
index ff8c85e..6179d8d 100644
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@@ -1,7 +1,7 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* upb's core components like upb_decoder and upb_msg are carefully designed to
@@ -34,25 +34,12 @@
extern "C" {
#endif
-// Decodes the given string, which must be in protobuf binary format, to the
-// given upb_msg with msgdef "md", storing the status of the operation in "s".
-bool upb_strtomsg(const char *str, size_t len, void *msg,
- const upb_msgdef *md, bool allow_jit, upb_status *s);
-
-// Parses the given file into a new message of the given type. Caller owns
-// the returned message (or NULL if an error occurred).
-void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s);
-
-//void upb_msgtotext(struct _upb_string *str, void *msg,
-// struct _upb_msgdef *md, bool single_line);
-
-
// Loads all defs from the given protobuf binary descriptor, setting default
// accessors and a default layout on all messages. The caller owns the
// returned array of defs, which will be of length *n. On error NULL is
// returned and status is set (if non-NULL).
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
- upb_status *status);
+ void *owner, upb_status *status);
// Like the previous but also adds the loaded defs to the given symtab.
bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str,
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 3f68f90..0d9c967 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -96,7 +96,7 @@ err:
const upb_fielddef *f = upb_value_getfielddef(fval); \
uint64_t start_ofs = upb_bytesink_getoffset(p->sink); \
CHECK(upb_textprinter_indent(p)); \
- CHECK(upb_bytesink_writestr(p->sink, f->name)); \
+ CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \
CHECK(upb_bytesink_writestr(p->sink, ": ")); \
CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val))); \
CHECK(upb_textprinter_endfield(p)); \
@@ -124,7 +124,8 @@ static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval,
upb_textprinter *p = _p;
uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
const upb_fielddef *f = upb_value_getfielddef(fval);
- upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
+ const upb_enumdef *enum_def =
+ upb_downcast_enumdef_const(upb_fielddef_subdef(f));
const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
if (label) {
CHECK(upb_bytesink_writestr(p->sink, label));
@@ -157,7 +158,7 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
const upb_fielddef *f = upb_value_getfielddef(fval);
CHECK(upb_textprinter_indent(p));
- CHECK(upb_bytesink_printf(p->sink, "%s {", f->name));
+ CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f)));
if (!p->single_line)
CHECK(upb_bytesink_putc(p->sink, '\n'));
p->indent_depth++;
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index 815a7a1..c0e0134 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -19,6 +19,16 @@
extern "C" {
#endif
+// A list of types as they are encoded on-the-wire.
+typedef enum {
+ UPB_WIRE_TYPE_VARINT = 0,
+ UPB_WIRE_TYPE_64BIT = 1,
+ UPB_WIRE_TYPE_DELIMITED = 2,
+ UPB_WIRE_TYPE_START_GROUP = 3,
+ UPB_WIRE_TYPE_END_GROUP = 4,
+ UPB_WIRE_TYPE_32BIT = 5,
+} upb_wiretype_t;
+
// The maximum number of bytes that it takes to encode a 64-bit varint.
// Note that with a better encoding this could be 9 (TODO: write up a
// wiki document about this).
diff --git a/upb/refcount.c b/upb/refcount.c
new file mode 100644
index 0000000..a15547a
--- /dev/null
+++ b/upb/refcount.c
@@ -0,0 +1,224 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+#include "upb/refcount.h"
+
+// TODO(haberman): require client to define these if ref debugging is on.
+#ifndef UPB_LOCK
+#define UPB_LOCK
+#endif
+
+#ifndef UPB_UNLOCK
+#define UPB_UNLOCK
+#endif
+
+/* arch-specific atomic primitives *******************************************/
+
+#ifdef UPB_THREAD_UNSAFE //////////////////////////////////////////////////////
+
+INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; }
+INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; }
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 ///////////////////
+
+INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
+INLINE bool upb_atomic_dec(uint32_t *a) {
+ return __sync_sub_and_fetch(a, 1) == 0;
+}
+
+#elif defined(WIN32) ///////////////////////////////////////////////////////////
+
+#include <Windows.h>
+
+INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
+INLINE bool upb_atomic_dec(upb_atomic_t *a) {
+ return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU. \
+ Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+// Reserved index values.
+#define UPB_INDEX_UNDEFINED UINT16_MAX
+#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1)
+
+static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) {
+ if (upb_refcount_merged(r, from)) return;
+ *r->count += *from->count;
+ free(from->count);
+ upb_refcount *base = from;
+
+ // Set all refcount pointers in the "from" chain to the merged refcount.
+ do { from->count = r->count; } while ((from = from->next) != base);
+
+ // Merge the two circularly linked lists by swapping their next pointers.
+ upb_refcount *tmp = r->next;
+ r->next = base->next;
+ base->next = tmp;
+}
+
+// Tarjan's algorithm, see:
+// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
+
+typedef struct {
+ int index;
+ upb_refcount **stack;
+ int stack_len;
+ upb_getsuccessors *func;
+} upb_tarjan_state;
+
+static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state);
+
+void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) {
+ upb_tarjan_state *state = _state;
+ if (subobj->index == UPB_INDEX_UNDEFINED) {
+ // Subdef has not yet been visited; recurse on it.
+ upb_refcount_dofindscc(subobj, state);
+ obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink);
+ } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) {
+ // Subdef is in the stack and hence in the current SCC.
+ obj->lowlink = UPB_MIN(obj->lowlink, subobj->index);
+ }
+}
+
+static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) {
+ obj->index = state->index;
+ obj->lowlink = state->index;
+ state->index++;
+ state->stack[state->stack_len++] = obj;
+
+ state->func(obj, state); // Visit successors.
+
+ if (obj->lowlink == obj->index) {
+ upb_refcount *scc_obj;
+ while ((scc_obj = state->stack[--state->stack_len]) != obj) {
+ upb_refcount_merge(obj, scc_obj);
+ scc_obj->index = UPB_INDEX_NOT_IN_STACK;
+ }
+ obj->index = UPB_INDEX_NOT_IN_STACK;
+ }
+}
+
+bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) {
+ // TODO(haberman): allocate less memory. We can't use n as a bound because
+ // it doesn't include fielddefs. Could either use a dynamically-resizing
+ // array or think of some other way.
+ upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func};
+ if (state.stack == NULL) return false;
+ for (int i = 0; i < n; i++)
+ if (refs[i]->index == UPB_INDEX_UNDEFINED)
+ upb_refcount_dofindscc(refs[i], &state);
+ free(state.stack);
+ return true;
+}
+
+
+/* upb_refcount **************************************************************/
+
+bool upb_refcount_init(upb_refcount *r, void *owner) {
+ r->count = malloc(sizeof(uint32_t));
+ if (!r->count) return false;
+ // Initializing this here means upb_refcount_findscc() can only run once for
+ // each refcount; may need to revise this to be more flexible.
+ r->index = UPB_INDEX_UNDEFINED;
+ r->next = r;
+#ifdef UPB_DEBUG_REFS
+ // We don't detect malloc() failures for UPB_DEBUG_REFS.
+ upb_inttable_init(&r->refs);
+ *r->count = 0;
+ upb_refcount_ref(r, owner);
+#else
+ *r->count = 1;
+#endif
+ return true;
+}
+
+void upb_refcount_uninit(upb_refcount *r) {
+ (void)r;
+#ifdef UPB_DEBUG_REFS
+ assert(upb_inttable_count(&r->refs) == 0);
+ upb_inttable_uninit(&r->refs);
+#endif
+}
+
+// Moves an existing ref from ref_donor to new_owner, without changing the
+// overall ref count.
+void upb_refcount_donateref(upb_refcount *r, void *from, void *to) {
+ (void)r; (void)from; (void)to;
+ assert(from != to);
+#ifdef UPB_DEBUG_REFS
+ upb_refcount_ref(r, to);
+ upb_refcount_unref(r, from);
+#endif
+}
+
+// Thread-safe operations //////////////////////////////////////////////////////
+
+// Ref and unref are thread-safe.
+void upb_refcount_ref(upb_refcount *r, void *owner) {
+ (void)owner;
+ upb_atomic_inc(r->count);
+#ifdef UPB_DEBUG_REFS
+ UPB_LOCK;
+ // Caller must not already own a ref.
+ assert(upb_inttable_lookup(&r->refs, (uintptr_t)owner) == NULL);
+
+ // If a ref is leaked we want to blame the leak on the whoever leaked the
+ // ref, not on who originally allocated the refcounted object. We accomplish
+ // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some
+ // memory and arrange setup pointers like so:
+ //
+ // upb_refcount
+ // +----------+ +---------+
+ // | count |<-+ |
+ // +----------+ +----------+
+ // | table |---X-->| malloc'd |
+ // +----------+ | memory |
+ // +----------+
+ //
+ // Since the "malloc'd memory" is allocated inside of "ref" and free'd in
+ // unref, it will cause a leak if not unref'd. And since the leaked memory
+ // points to the object itself, the object will be considered "indirectly
+ // lost" by tools like Valgrind and not shown unless requested (which is good
+ // because the object's creator may not be responsible for the leak). But we
+ // have to hide the pointer marked "X" above from Valgrind, otherwise the
+ // malloc'd memory will appear to be indirectly leaked and the object itself
+ // will still be considered the primary leak. We hide this pointer from
+ // Valgrind (et all) by doing a bitwise not on it.
+ upb_refcount **target = malloc(sizeof(void*));
+ uintptr_t obfuscated = ~(uintptr_t)target;
+ *target = r;
+ upb_inttable_insert(&r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated));
+ UPB_UNLOCK;
+#endif
+}
+
+bool upb_refcount_unref(upb_refcount *r, void *owner) {
+ (void)owner;
+ bool ret = upb_atomic_dec(r->count);
+#ifdef UPB_DEBUG_REFS
+ UPB_LOCK;
+ upb_value v;
+ bool success = upb_inttable_remove(&r->refs, (uintptr_t)owner, &v);
+ assert(success);
+ if (success) {
+ // Must un-obfuscate the pointer (see above).
+ free((void*)(~upb_value_getuint64(v)));
+ }
+ UPB_UNLOCK;
+#endif
+ if (ret) free(r->count);
+ return ret;
+}
+
+bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) {
+ return r->count == r2->count;
+}
diff --git a/upb/refcount.h b/upb/refcount.h
new file mode 100644
index 0000000..cb2bda9
--- /dev/null
+++ b/upb/refcount.h
@@ -0,0 +1,70 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A thread-safe refcount that can optionally track references for debugging
+ * purposes. It helps avoid circular references by allowing a
+ * strongly-connected component in the graph to share a refcount.
+ *
+ * This interface is internal to upb.
+ */
+
+#ifndef UPB_REFCOUNT_H_
+#define UPB_REFCOUNT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb/table.h"
+
+#ifndef NDEBUG
+#define UPB_DEBUG_REFS
+#endif
+
+typedef struct _upb_refcount {
+ uint32_t *count;
+ struct _upb_refcount *next; // Circularly-linked list of this SCC.
+ uint16_t index; // For SCC algorithm.
+ uint16_t lowlink; // For SCC algorithm.
+#ifdef UPB_DEBUG_REFS
+ upb_inttable refs;
+#endif
+} upb_refcount;
+
+// NON THREAD SAFE operations //////////////////////////////////////////////////
+
+// Initializes the refcount with a single ref for the given owner. Returns
+// NULL if memory could not be allocated.
+bool upb_refcount_init(upb_refcount *r, void *owner);
+
+// Uninitializes the refcount. May only be called after unref() returns true.
+void upb_refcount_uninit(upb_refcount *r);
+
+// Moves an existing ref from ref_donor to new_owner, without changing the
+// overall ref count.
+void upb_refcount_donateref(upb_refcount *r, void *from, void *to);
+
+// Finds strongly-connected components among some set of objects and merges all
+// refcounts that share a SCC. The given function will be called when the
+// algorithm needs to visit children of a particular object; the function
+// should call upb_refcount_visit() once for each child obj.
+//
+// Returns false if memory allocation failed.
+typedef void upb_getsuccessors(upb_refcount *obj, void*);
+bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func);
+void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure);
+
+// Thread-safe operations //////////////////////////////////////////////////////
+
+// Increases the ref count, the new ref is owned by "owner" which must not
+// already own a ref. Circular reference chains are not allowed.
+void upb_refcount_ref(upb_refcount *r, void *owner);
+
+// Release a ref owned by owner, returns true if that was the last ref.
+bool upb_refcount_unref(upb_refcount *r, void *owner);
+
+// Returns true if these two objects share a refcount.
+bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2);
+
+#endif // UPB_REFCOUNT_H_
diff --git a/upb/table.c b/upb/table.c
index 31c91b1..4e3544e 100644
--- a/upb/table.c
+++ b/upb/table.c
@@ -4,8 +4,10 @@
* Copyright (c) 2009 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * There are a few printf's strewn throughout this file, uncommenting them
- * can be useful for debugging.
+ * Implementation is heavily inspired by Lua's ltable.c.
+ *
+ * TODO: for table iteration we use (array - 1) in several places; is this
+ * undefined behavior? If so find a better solution.
*/
#include "upb/table.h"
@@ -14,6 +16,8 @@
#include <stdlib.h>
#include <string.h>
+#define UPB_MAXARRSIZE 16 // 64k.
+
static const double MAX_LOAD = 0.85;
// The minimum percentage of an array part that we will allow. This is a
@@ -21,385 +25,319 @@ static const double MAX_LOAD = 0.85;
// cache effects). The lower this is, the more memory we'll use.
static const double MIN_DENSITY = 0.1;
+int upb_log2(uint64_t v) {
+#ifdef __GNUC__
+ int ret = 31 - __builtin_clz(v);
+#else
+ int ret = 0;
+ while (v >>= 1) ret++;
+#endif
+ return UPB_MIN(UPB_MAXARRSIZE, ret);
+}
+
+static upb_tabkey upb_strkey(const char *str) {
+ upb_tabkey k;
+ k.str = (char*)str;
+ return k;
+}
+
static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
+typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2);
/* Base table (shared code) ***************************************************/
-static uint32_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
-static size_t upb_table_entrysize(const upb_table *t) { return t->entry_size; }
-static size_t upb_table_valuesize(const upb_table *t) { return t->value_size; }
+static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
+
+static bool upb_table_isfull(upb_table *t) {
+ return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
+}
-void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) {
+static bool upb_table_init(upb_table *t, uint8_t size_lg2) {
t->count = 0;
- t->entry_size = entry_size;
- t->size_lg2 = 1;
- while(upb_table_size(t) < size) t->size_lg2++;
- size_t bytes = upb_table_size(t) * t->entry_size;
+ t->size_lg2 = size_lg2;
+ size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
t->mask = upb_table_size(t) - 1;
t->entries = malloc(bytes);
+ if (!t->entries) return false;
+ memset(t->entries, 0, bytes);
+ return true;
}
-void upb_table_free(upb_table *t) { free(t->entries); }
+static void upb_table_uninit(upb_table *t) { free(t->entries); }
-/* upb_inttable ***************************************************************/
+static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }
-static upb_inttable_entry *intent(const upb_inttable *t, int32_t i) {
- //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size);
- return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+static upb_tabent *upb_table_emptyent(const upb_table *t) {
+ upb_tabent *e = t->entries + upb_table_size(t);
+ while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
}
-static uint32_t upb_inttable_hashtablesize(const upb_inttable *t) {
- return upb_table_size(&t->t);
+static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
+ upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+ upb_tabent *e = hash(t, key);
+ if (upb_tabent_isempty(e)) return NULL;
+ while (1) {
+ if (eql(e->key, key)) return &e->val;
+ if ((e = e->next) == NULL) return NULL;
+ }
}
-void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize,
- uint16_t value_size) {
- size_t entsize = _upb_inttable_entrysize(value_size);
- upb_table_init(&t->t, hashsize, entsize);
- for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
- upb_inttable_entry *e = intent(t, i);
- e->hdr.key = 0;
- e->hdr.next = UPB_END_OF_CHAIN;
- e->val.has_entry = 0;
+// The given key must not already exist in the table.
+static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
+ upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+ assert(upb_table_lookup(t, key, hash, eql) == NULL);
+ t->count++;
+ upb_tabent *mainpos_e = hash(t, key);
+ upb_tabent *our_e = mainpos_e;
+ if (!upb_tabent_isempty(mainpos_e)) { // Collision.
+ upb_tabent *new_e = upb_table_emptyent(t);
+ upb_tabent *chain = hash(t, mainpos_e->key); // Head of collider's chain.
+ if (chain == mainpos_e) {
+ // Existing ent is in its main posisiton (it has the same hash as us, and
+ // is the head of our chain). Insert to new ent and append to this chain.
+ new_e->next = mainpos_e->next;
+ mainpos_e->next = new_e;
+ our_e = new_e;
+ } else {
+ // Existing ent is not in its main position (it is a node in some other
+ // chain). This implies that no existing ent in the table has our hash.
+ // Evict it (updating its chain) and use its ent for head of our chain.
+ *new_e = *mainpos_e; // copies next.
+ while (chain->next != mainpos_e) chain = chain->next;
+ chain->next = new_e;
+ our_e = mainpos_e;
+ our_e->next = NULL;
+ }
}
- t->t.value_size = value_size;
- // Always make the array part at least 1 long, so that we know key 0
- // won't be in the hash part (which lets us speed up that code path).
- t->array_size = UPB_MAX(1, arrsize);
- t->array = malloc(upb_table_valuesize(&t->t) * t->array_size);
- t->array_count = 0;
- for (uint32_t i = 0; i < t->array_size; i++) {
- upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t));
- val->has_entry = false;
+ our_e->key = key;
+ our_e->val = val;
+ assert(upb_table_lookup(t, key, hash, eql) == &our_e->val);
+}
+
+static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
+ upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+ upb_tabent *chain = hash(t, key);
+ if (eql(chain->key, key)) {
+ t->count--;
+ if (val) *val = chain->val;
+ if (chain->next) {
+ upb_tabent *move = chain->next;
+ *chain = *move;
+ move->key.num = 0; // Make the slot empty.
+ } else {
+ chain->key.num = 0; // Make the slot empty.
+ }
+ return true;
+ } else {
+ while (chain->next && !eql(chain->next->key, key))
+ chain = chain->next;
+ if (chain->next) {
+ // Found element to remove.
+ if (val) *val = chain->next->val;
+ chain->next->key.num = 0;
+ chain->next = chain->next->next;
+ t->count--;
+ return true;
+ } else {
+ return false;
+ }
}
}
-void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) {
- upb_inttable_sizedinit(t, 0, hashsize, value_size);
+static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) {
+ upb_tabent *end = t->entries + upb_table_size(t);
+ do { if (++e == end) return NULL; } while(e->key.num == 0);
+ return e;
}
-void upb_inttable_free(upb_inttable *t) {
- upb_table_free(&t->t);
- free(t->array);
+static upb_tabent *upb_table_begin(const upb_table *t) {
+ return upb_table_next(t, t->entries - 1);
}
-static uint32_t empty_intbucket(upb_inttable *table)
-{
- // TODO: does it matter that this is biased towards the front of the table?
- for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) {
- upb_inttable_entry *e = intent(table, i);
- if(!e->val.has_entry) return i;
- }
- assert(false);
- return 0;
+
+/* upb_strtable ***************************************************************/
+
+// A simple "subclass" of upb_table that only adds a hash function for strings.
+
+static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
+ // Could avoid the strlen() by using a hash function that terminates on NULL.
+ return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
}
-// The insert routines have a lot more code duplication between int/string
-// variants than I would like, but there's just a bit too much that varies to
-// parameterize them.
-static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
- assert(upb_inttable_lookup(t, key) == NULL);
- upb_inttable_value *table_val;
- if (_upb_inttable_isarrkey(t, key)) {
- table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
- t->array_count++;
- //printf("Inserting key %d to Array part! %p\n", key, table_val);
- } else {
- t->t.count++;
- uint32_t bucket = _upb_inttable_bucket(t, key);
- upb_inttable_entry *table_e = intent(t, bucket);
- //printf("Hash part! Inserting into bucket %d?\n", bucket);
- if(table_e->val.has_entry) { /* Collision. */
- //printf("Collision!\n");
- if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) {
- /* Existing element is in its main posisiton. Find an empty slot to
- * place our new element and append it to this key's chain. */
- uint32_t empty_bucket = empty_intbucket(t);
- while (table_e->hdr.next != UPB_END_OF_CHAIN)
- table_e = intent(t, table_e->hdr.next);
- table_e->hdr.next = empty_bucket;
- table_e = intent(t, empty_bucket);
- } else {
- /* Existing element is not in its main position. Move it to an empty
- * slot and put our element in its main position. */
- uint32_t empty_bucket = empty_intbucket(t);
- uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key);
- memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
- upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
- while(1) {
- assert(evictee_e->val.has_entry);
- assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
- if(evictee_e->hdr.next == bucket) {
- evictee_e->hdr.next = empty_bucket;
- break;
- }
- evictee_e = intent(t, evictee_e->hdr.next);
- }
- /* table_e remains set to our mainpos. */
- }
- }
- //printf("Inserting! to:%p, copying to: %p\n", table_e, &table_e->val);
- table_val = &table_e->val;
- table_e->hdr.key = key;
- table_e->hdr.next = UPB_END_OF_CHAIN;
- }
- memcpy(table_val, val, upb_table_valuesize(&t->t));
- table_val->has_entry = true;
- assert(upb_inttable_lookup(t, key) == table_val);
+static bool upb_streql(upb_tabkey k1, upb_tabkey k2) {
+ return strcmp(k1.str, k2.str) == 0;
}
-// Insert all elements from src into dest. Caller ensures that a resize will
-// not be necessary.
-static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
- for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
- i = upb_inttable_next(src, i)) {
- //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
- assert((double)(upb_table_count(&dst->t)) /
- upb_inttable_hashtablesize(dst) <= MAX_LOAD);
- intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
- }
+bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); }
+
+void upb_strtable_uninit(upb_strtable *t) {
+ for (size_t i = 0; i < upb_table_size(&t->t); i++)
+ free(t->t.entries[i].key.str);
+ upb_table_uninit(&t->t);
}
-void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
- if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
- //printf("RESIZE!\n");
- // Need to resize. Allocate new table with double the size of however many
- // elements we have now, add old elements to it. We create the new hash
- // table without an array part, even if the old table had an array part.
- // If/when the user calls upb_inttable_compact() again, we'll create an
- // array part then.
- upb_inttable new_table;
- //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t));
- upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t));
- upb_inttable_insertall(&new_table, t);
- upb_inttable_free(t);
+bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
+ if (upb_table_isfull(&t->t)) {
+ // Need to resize. New table of double the size, add old elements to it.
+ upb_strtable new_table;
+ if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false;
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, t);
+ for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ upb_strtable_insert(
+ &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
+ }
+ upb_strtable_uninit(t);
*t = new_table;
}
- intinsert(t, key, val);
+ if ((k = strdup(k)) == NULL) return false;
+ upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql);
+ return true;
}
-void upb_inttable_compact(upb_inttable *t) {
- // Find the largest array part we can that satisfies the MIN_DENSITY
- // definition. For now we just count down powers of two.
- uint32_t largest_key = 0;
- for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
- i = upb_inttable_next(t, i)) {
- largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
- }
- int lg2_array = 0;
- while ((1UL << lg2_array) < largest_key) ++lg2_array;
- ++lg2_array; // Undo the first iteration.
- size_t array_size = 0;
- int array_count = 0;
- while (lg2_array > 0) {
- array_size = (1 << --lg2_array);
- //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
- if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) {
- // Even if 100% of the keys were in the array pary, an array of this
- // size would not be dense enough.
- continue;
- }
- array_count = 0;
- for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
- i = upb_inttable_next(t, i)) {
- if (upb_inttable_iter_key(i) < array_size)
- array_count++;
- }
- //printf("There would be %d things in that array\n", array_count);
- if ((double)array_count / array_size >= MIN_DENSITY) break;
- }
- upb_inttable new_table;
- int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
- //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
- upb_inttable_sizedinit(&new_table, array_size, hash_size,
- upb_table_valuesize(&t->t));
- //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
- upb_inttable_insertall(&new_table, t);
- upb_inttable_free(t);
- *t = new_table;
+upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
+ return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql);
}
-upb_inttable_iter upb_inttable_begin(const upb_inttable *t) {
- upb_inttable_iter iter = {-1, NULL, true}; // -1 will overflow to 0 on the first iteration.
- return upb_inttable_next(t, iter);
+void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
+ i->t = t;
+ i->e = upb_table_begin(&t->t);
}
-upb_inttable_iter upb_inttable_next(const upb_inttable *t,
- upb_inttable_iter iter) {
- const size_t hdrsize = sizeof(upb_inttable_header);
- const size_t entsize = upb_table_entrysize(&t->t);
- if (iter.array_part) {
- while (++iter.key < t->array_size) {
- //printf("considering value %d\n", iter.key);
- iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size);
- if (iter.value->has_entry) return iter;
- }
- //printf("Done with array part!\n");
- iter.array_part = false;
- // Point to the value of the table[-1] entry.
- iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize);
- }
- void *end = intent(t, upb_inttable_hashtablesize(t));
- // Point to the entry for the value that was previously in iter.
- upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize);
- do {
- e = UPB_INDEX(e, 1, entsize);
- //printf("considering value %p (val: %p)\n", e, &e->val);
- if(e == end) {
- //printf("No values.\n");
- iter.value = NULL;
- return iter;
- }
- } while(!e->val.has_entry);
- //printf("USING VALUE! %p\n", e);
- iter.key = e->hdr.key;
- iter.value = &e->val;
- return iter;
+void upb_strtable_next(upb_strtable_iter *i) {
+ i->e = upb_table_next(&i->t->t, i->e);
}
-/* upb_strtable ***************************************************************/
+/* upb_inttable ***************************************************************/
-static upb_strtable_entry *strent(const upb_strtable *t, int32_t i) {
- //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
- assert(i <= (int32_t)upb_table_size(&t->t));
- return UPB_INDEX(t->t.entries, i, t->t.entry_size);
-}
+// For inttables we use a hybrid structure where small keys are kept in an
+// array and large keys are put in the hash table.
-static uint32_t upb_strtable_size(const upb_strtable *t) {
- return upb_table_size(&t->t);
+static bool upb_inteql(upb_tabkey k1, upb_tabkey k2) {
+ return k1.num == k2.num;
}
-void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
- t->t.value_size = valuesize;
- size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
- upb_table_init(&t->t, size, entsize);
- for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
- upb_strtable_entry *e = strent(t, i);
- e->hdr.key = NULL;
- e->hdr.next = UPB_END_OF_CHAIN;
- }
+size_t upb_inttable_count(const upb_inttable *t) {
+ return t->t.count + t->array_count;
}
-void upb_strtable_free(upb_strtable *t) {
- // Free keys from the strtable.
- upb_strtable_iter i;
- for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
- free((char*)upb_strtable_iter_key(&i));
- upb_table_free(&t->t);
+bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
+ if (!upb_table_init(&t->t, hsize_lg2)) return false;
+ // Always make the array part at least 1 long, so that we know key 0
+ // won't be in the hash part, which simplifies things.
+ t->array_size = UPB_MAX(1, asize);
+ t->array_count = 0;
+ size_t array_bytes = t->array_size * sizeof(upb_value);
+ t->array = malloc(array_bytes);
+ if (!t->array) {
+ upb_table_uninit(&t->t);
+ return false;
+ }
+ memset(t->array, 0xff, array_bytes);
+ return true;
}
-static uint32_t strtable_bucket(const upb_strtable *t, const char *key) {
- uint32_t hash = MurmurHash2(key, strlen(key), 0);
- return (hash & t->t.mask);
+bool upb_inttable_init(upb_inttable *t) {
+ return upb_inttable_sizedinit(t, 0, 4);
}
-void *upb_strtable_lookup(const upb_strtable *t, const char *key) {
- uint32_t bucket = strtable_bucket(t, key);
- upb_strtable_entry *e;
- do {
- e = strent(t, bucket);
- if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
- } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
- return NULL;
+void upb_inttable_uninit(upb_inttable *t) {
+ upb_table_uninit(&t->t);
+ free(t->array);
}
-void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len) {
- // TODO: improve.
- char *key2 = malloc(len+1);
- memcpy(key2, key, len);
- key2[len] = '\0';
- void *ret = upb_strtable_lookup(t, key2);
- free(key2);
- return ret;
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
+ assert(upb_arrhas(val));
+ if (key < t->array_size) {
+ assert(!upb_arrhas(t->array[key]));
+ t->array_count++;
+ t->array[key] = val;
+ } else {
+ if (upb_table_isfull(&t->t)) {
+ // Need to resize the hash part, but we re-use the array part.
+ upb_table new_table;
+ if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false;
+ upb_tabent *e;
+ for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e))
+ upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql);
+ upb_table_uninit(&t->t);
+ t->t = new_table;
+ }
+ upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
+ }
+ return true;
}
-static uint32_t empty_strbucket(upb_strtable *table) {
- // TODO: does it matter that this is biased towards the front of the table?
- for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
- upb_strtable_entry *e = strent(table, i);
- if(!e->hdr.key) return i;
+upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
+ if (key < t->array_size) {
+ upb_value *v = &t->array[key];
+ return upb_arrhas(*v) ? v : NULL;
}
- assert(false);
- return 0;
+ return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql);
}
-static void strinsert(upb_strtable *t, const char *key, const void *val) {
- assert(upb_strtable_lookup(t, key) == NULL);
- t->t.count++;
- uint32_t bucket = strtable_bucket(t, key);
- upb_strtable_entry *table_e = strent(t, bucket);
- if(table_e->hdr.key) { /* Collision. */
- if(bucket == strtable_bucket(t, table_e->hdr.key)) {
- /* Existing element is in its main posisiton. Find an empty slot to
- * place our new element and append it to this key's chain. */
- uint32_t empty_bucket = empty_strbucket(t);
- while (table_e->hdr.next != UPB_END_OF_CHAIN)
- table_e = strent(t, table_e->hdr.next);
- table_e->hdr.next = empty_bucket;
- table_e = strent(t, empty_bucket);
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+ if (key < t->array_size) {
+ if (upb_arrhas(t->array[key])) {
+ t->array_count--;
+ if (val) *val = t->array[key];
+ t->array[key] = upb_value_uint64(-1);
+ return true;
} else {
- /* Existing element is not in its main position. Move it to an empty
- * slot and put our element in its main position. */
- uint32_t empty_bucket = empty_strbucket(t);
- uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
- memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
- upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
- while(1) {
- assert(evictee_e->hdr.key);
- assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
- if(evictee_e->hdr.next == bucket) {
- evictee_e->hdr.next = empty_bucket;
- break;
- }
- evictee_e = strent(t, evictee_e->hdr.next);
- }
- /* table_e remains set to our mainpos. */
+ return false;
}
+ } else {
+ return upb_table_remove(
+ &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
}
- //fprintf(stderr, "val: %p\n", val);
- //fprintf(stderr, "val size: %d\n", t->t.value_size);
- memcpy(&table_e->val, val, t->t.value_size);
- table_e->hdr.key = strdup(key);
- table_e->hdr.next = UPB_END_OF_CHAIN;
- //fprintf(stderr, "Looking up, string=%s...\n", key);
- assert(upb_strtable_lookup(t, key) == &table_e->val);
- //printf("Yay!\n");
}
-void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
- if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
- // Need to resize. New table of double the size, add old elements to it.
- //printf("RESIZE!!\n");
- upb_strtable new_table;
- upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
- upb_strtable_iter i;
- upb_strtable_begin(&i, t);
- for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- strinsert(&new_table,
- upb_strtable_iter_key(&i),
- upb_strtable_iter_value(&i));
- }
- upb_strtable_free(t);
- *t = new_table;
+void upb_inttable_compact(upb_inttable *t) {
+ // Find the largest power of two that satisfies the MIN_DENSITY definition.
+ int counts[UPB_MAXARRSIZE + 1] = {0};
+ upb_inttable_iter i;
+ for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
+ counts[upb_log2(upb_inttable_iter_key(&i))]++;
+ int count = upb_inttable_count(t);
+ int size;
+ for (size = UPB_MAXARRSIZE; size > 1; size--) {
+ count -= counts[size];
+ if (count >= (1 << size) * MIN_DENSITY) break;
}
- strinsert(t, key, val);
+
+ // Insert all elements into new, perfectly-sized table.
+ upb_inttable new_table;
+ int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD;
+ upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1);
+ for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
+ upb_inttable_insert(
+ &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i));
+ upb_inttable_uninit(t);
+ *t = new_table;
}
-void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
- i->e = strent(t, -1);
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
i->t = t;
- upb_strtable_next(i);
+ i->arrkey = -1;
+ i->array_part = true;
+ upb_inttable_next(i);
}
-void upb_strtable_next(upb_strtable_iter *i) {
- upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
- upb_strtable_entry *cur = i->e;
- do {
- cur = (void*)((char*)cur + i->t->t.entry_size);
- if(cur == end) { i->e = NULL; return; }
- } while(cur->hdr.key == NULL);
- i->e = cur;
+void upb_inttable_next(upb_inttable_iter *iter) {
+ const upb_inttable *t = iter->t;
+ if (iter->array_part) {
+ for (size_t i = iter->arrkey; ++i < t->array_size; )
+ if (upb_arrhas(t->array[i])) {
+ iter->ptr.val = &t->array[i];
+ iter->arrkey = i;
+ return;
+ }
+ iter->array_part = false;
+ iter->ptr.ent = t->t.entries - 1;
+ }
+ iter->ptr.ent = upb_table_next(&t->t, iter->ptr.ent);
}
#ifdef UPB_UNALIGNED_READS_OK
@@ -413,8 +351,7 @@ void upb_strtable_next(upb_strtable_iter *i) {
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
-{
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
@@ -465,8 +402,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
-{
+static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
const uint8_t * data = (const uint8_t *)key;
diff --git a/upb/table.h b/upb/table.h
index 0c0a785..f6bff66 100644
--- a/upb/table.h
+++ b/upb/table.h
@@ -4,13 +4,16 @@
* Copyright (c) 2009 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * This file defines very fast int->struct (inttable) and string->struct
- * (strtable) hash tables. The struct can be of any size, and it is stored
- * in the table itself, for cache-friendly performance.
+ * This file defines very fast int->upb_value (inttable) and string->upb_value
+ * (strtable) hash tables.
*
- * The table uses internal chaining with Brent's variation (inspired by the
- * Lua implementation of hash tables). The hash function for strings is
- * Austin Appleby's "MurmurHash."
+ * The table uses chained scatter with Brent's variation (inspired by the Lua
+ * implementation of hash tables). The hash function for strings is Austin
+ * Appleby's "MurmurHash."
+ *
+ * The inttable uses uintptr_t as its key, which guarantees it can be used to
+ * store pointers or integers of at least 32 bits (upb isn't really useful on
+ * systems where sizeof(void*) < 4).
*
* This header is internal to upb; its interface should not be considered
* public or stable.
@@ -19,52 +22,30 @@
#ifndef UPB_TABLE_H_
#define UPB_TABLE_H_
-#include <assert.h>
#include <stddef.h>
+#include <stdint.h>
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
-#define UPB_END_OF_CHAIN (uint32_t)-1
-
-typedef struct {
- bool has_entry:1;
- // The rest of the bits are the user's.
-} upb_inttable_value;
-
-typedef struct {
- uint32_t key;
- uint32_t next; // Internal chaining.
-} upb_inttable_header;
-
-typedef struct {
- upb_inttable_header hdr;
- upb_inttable_value val;
-} upb_inttable_entry;
-
-// TODO: consider storing the hash in the entry. This would avoid the need to
-// rehash on table resizes, but more importantly could possibly improve lookup
-// performance by letting us compare hashes before comparing lengths or the
-// strings themselves.
-typedef struct {
- char *key; // We own, nullz. TODO: store explicit len?
- uint32_t next; // Internal chaining.
-} upb_strtable_header;
+typedef union {
+ uintptr_t num;
+ char *str; // We own, nullz.
+} upb_tabkey;
-typedef struct {
- upb_strtable_header hdr;
- uint32_t val; // Val is at least 32 bits.
-} upb_strtable_entry;
+typedef struct _upb_tabent {
+ upb_tabkey key;
+ upb_value val;
+ struct _upb_tabent *next; // Internal chaining.
+} upb_tabent;
typedef struct {
- void *entries; // Hash table.
- uint32_t count; // Number of entries in the hash part.
- uint32_t mask; // Mask to turn hash value -> bucket.
- uint16_t entry_size; // Size of each entry.
- uint16_t value_size; // Size of each value.
- uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries.
+ upb_tabent *entries; // Hash table.
+ size_t count; // Number of entries in the hash part.
+ size_t mask; // Mask to turn hash value -> bucket.
+ uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries.
} upb_table;
typedef struct {
@@ -72,149 +53,124 @@ typedef struct {
} upb_strtable;
typedef struct {
- upb_table t;
- void *array; // Array part of the table.
- uint32_t array_size; // Array part size.
- uint32_t array_count; // Array part number of elements.
+ upb_table t; // For entries that don't fit in the array part.
+ upb_value *array; // Array part of the table.
+ size_t array_size; // Array part size.
+ size_t array_count; // Array part number of elements.
} upb_inttable;
-// Initialize and free a table, respectively. Specify the initial size
-// with 'size' (the size will be increased as necessary). Value size
-// specifies how many bytes each value in the table is.
-//
-// WARNING! The lowest bit of every entry is reserved by the hash table.
-// It will always be overwritten when you insert, and must not be modified
-// when looked up!
-void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
-void upb_inttable_free(upb_inttable *table);
-void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
-void upb_strtable_free(upb_strtable *table);
-
-// Number of values in the hash table.
-INLINE uint32_t upb_table_count(const upb_table *t) { return t->count; }
-INLINE uint32_t upb_inttable_count(const upb_inttable *t) {
- return t->array_count + upb_table_count(&t->t);
-}
-INLINE uint32_t upb_strtable_count(const upb_strtable *t) {
- return upb_table_count(&t->t);
+INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; }
+
+INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
+ return t->entries + ((uint32_t)key.num & t->mask);
}
-// Inserts the given key into the hashtable with the given value. The key must
-// not already exist in the hash table. The data will be copied from val into
-// the hashtable (the amount of data copied comes from value_size when the
-// table was constructed). Therefore the data at val may be freed once the
-// call returns. For string tables, the table takes ownership of the string.
-//
-// WARNING: the lowest bit of val is reserved and will be overwritten!
-void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
-// TODO: may want to allow for more complex keys with custom hash/comparison
-// functions.
-void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
-void upb_inttable_compact(upb_inttable *t);
+INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; }
-INLINE uint32_t _upb_inttable_bucket(const upb_inttable *t, uint32_t k) {
- uint32_t bucket = k & t->t.mask; // Identity hash for ints.
- assert(bucket != UPB_END_OF_CHAIN);
- return bucket;
-}
+// Initialize and uninitialize a table, respectively. If memory allocation
+// failed, false is returned that the table is uninitialized.
+bool upb_inttable_init(upb_inttable *table);
+bool upb_strtable_init(upb_strtable *table);
+void upb_inttable_uninit(upb_inttable *table);
+void upb_strtable_uninit(upb_strtable *table);
-// Returns true if this key belongs in the array part of the table.
-INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
- return (k < t->array_size);
-}
+// Returns the number of values in the table.
+size_t upb_inttable_count(const upb_inttable *t);
+INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; }
-// Looks up key in this table, returning a pointer to the user's inserted data.
-// We have the caller specify the entry_size because fixing this as a literal
-// (instead of reading table->entry_size) gives the compiler more ability to
-// optimize.
+// Inserts the given key into the hashtable with the given value. The key must
+// not already exist in the hash table. For string tables, the key must be
+// NULL-terminated, and the table will make an internal copy of the key.
+// Inttables must not insert a value of UINTPTR_MAX.
//
-// Note: All returned pointers are invalidated by inserts!
-INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
- size_t entry_size, size_t value_size) {
- upb_inttable_value *arrval =
- (upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
- if (_upb_inttable_isarrkey(t, key)) {
- return (arrval->has_entry) ? arrval : NULL;
+// If a table resize was required but memory allocation failed, false is
+// returned and the table is unchanged.
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
+bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
+
+// Looks up key in this table, returning a pointer to the table's internal copy
+// of the user's inserted data, or NULL if this key is not in the table. The
+// user is free to modify the given upb_value, which will be reflected in any
+// future lookups of this key. The returned pointer is invalidated by inserts.
+upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key);
+upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key);
+
+// Removes an item from the table. Returns true if the remove was successful,
+// and stores the removed item in *val if non-NULL.
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
+
+// Optimizes the table for the current set of entries, for both memory use and
+// lookup time. Client should call this after all entries have been inserted;
+// inserting more entries is legal, but will likely require a table resize.
+void upb_inttable_compact(upb_inttable *t);
+
+// A special-case inlinable version of the lookup routine for 32-bit integers.
+INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) {
+ if (key < t->array_size) {
+ upb_value *v = &t->array[key];
+ return upb_arrhas(*v) ? v : NULL;
}
- uint32_t bucket = _upb_inttable_bucket(t, key);
- upb_inttable_entry *e =
- (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
- while (1) {
- if (e->hdr.key == key) {
- return &e->val;
- }
- if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
- e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+ for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) {
+ if ((uint32_t)e->key.num == key) return &e->val;
+ if (e->next == NULL) return NULL;
}
}
-INLINE size_t _upb_inttable_entrysize(size_t value_size) {
- return upb_align_up(sizeof(upb_inttable_header) + value_size, 8);
-}
-
-INLINE void *upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
- uint32_t value_size) {
- return _upb_inttable_fastlookup(
- t, key, _upb_inttable_entrysize(value_size), value_size);
-}
-
-INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
- return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
-}
-
-void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len);
-void *upb_strtable_lookup(const upb_strtable *t, const char *key);
-
/* upb_strtable_iter **********************************************************/
// Strtable iteration. Order is undefined. Insertions invalidate iterators.
// upb_strtable_iter i;
-// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+// upb_strtable_begin(&i, t);
+// for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
// const char *key = upb_strtable_iter_key(&i);
// const myval *val = upb_strtable_iter_value(&i);
// // ...
// }
typedef struct {
const upb_strtable *t;
- upb_strtable_entry *e;
+ upb_tabent *e;
} upb_strtable_iter;
void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
- return i->e->hdr.key;
+ return i->e->key.str;
}
-INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
- return &i->e->val;
+INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) {
+ return i->e->val;
}
/* upb_inttable_iter **********************************************************/
// Inttable iteration. Order is undefined. Insertions invalidate iterators.
-// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-// i = upb_inttable_next(t, i)) {
+// upb_inttable_iter i;
+// upb_inttable_begin(&i, t);
+// for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
// // ...
// }
typedef struct {
- uint32_t key;
- upb_inttable_value *value;
+ const upb_inttable *t;
+ union {
+ upb_tabent *ent; // For hash iteration.
+ upb_value *val; // For array iteration.
+ } ptr;
+ uintptr_t arrkey;
bool array_part;
} upb_inttable_iter;
-upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
-upb_inttable_iter upb_inttable_next(const upb_inttable *t,
- upb_inttable_iter iter);
-INLINE bool upb_inttable_done(upb_inttable_iter iter) {
- return iter.value == NULL;
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
+void upb_inttable_next(upb_inttable_iter *i);
+INLINE bool upb_inttable_done(upb_inttable_iter *i) {
+ return i->ptr.ent == NULL;
}
-INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
- return iter.key;
+INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) {
+ return i->array_part ? i->arrkey : i->ptr.ent->key.num;
}
-INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {
- return iter.value;
+INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) {
+ return i->array_part ? *i->ptr.val : i->ptr.ent->val;
}
#ifdef __cplusplus
diff --git a/upb/upb.c b/upb/upb.c
index 3af9b75..c172bd3 100644
--- a/upb/upb.c
+++ b/upb/upb.c
@@ -1,47 +1,17 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Copyright (c) 2009-2012 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "upb/descriptor_const.h"
#include "upb/upb.h"
-#include "upb/bytestream.h"
-
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
- {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
- #ctype, is_numeric},
-
-const upb_type_info upb_types[] = {
- // END_GROUP is not real, but used to signify the pseudo-field that
- // ends a group from within the group.
- TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE, false) // ENDGROUP
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE, true) // DOUBLE
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT, true) // FLOAT
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // INT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64, true) // UINT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // INT32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64, true) // FIXED64
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32, true) // FIXED32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL, true) // BOOL
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // STRING
- TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE, false) // GROUP
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE, false) // MESSAGE
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // BYTES
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32, true) // UINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32, true) // ENUM
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32, true) // SFIXED32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64, true) // SFIXED64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // SINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // SINT64
-};
#ifdef NDEBUG
upb_value UPB_NO_VALUE = {{0}};
@@ -142,8 +112,9 @@ bool upb_errno_is_wouldblock() {
bool upb_posix_codetostr(int code, char *buf, size_t len) {
if (strerror_r(code, buf, len) == -1) {
if (errno == EINVAL) {
- int n = snprintf(buf, len, "Invalid POSIX error number %d\n", code);
- return n >= (int)len;
+ size_t actual_len =
+ snprintf(buf, len, "Invalid POSIX error number %d\n", code);
+ return actual_len >= len;
} else if (errno == ERANGE) {
return false;
}
diff --git a/upb/upb.h b/upb/upb.h
index 01970ca..ef440fb 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -15,9 +15,6 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#include <string.h>
-#include "descriptor_const.h"
-#include "atomic.h"
#ifdef __cplusplus
extern "C" {
@@ -36,20 +33,6 @@ extern "C" {
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
-#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
-
-INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
-
-#ifdef NDEBUG
-#define DEBUGPRINTF nop_printf
-#else
-#define DEBUGPRINTF printf
-#endif
-
-// Rounds val up to the next multiple of align.
-INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) {
- return val % align == 0 ? val : val + align - (val % align);
-}
// The maximum that any submessages can be nested. Matches proto2's limit.
// At the moment this specifies the size of several statically-sized arrays
@@ -94,73 +77,46 @@ INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) {
#define UPB_MAX_TYPE_DEPTH 64
-/* Fundamental types and type constants. **************************************/
-
-// A list of types as they are encoded on-the-wire.
-enum upb_wire_type {
- UPB_WIRE_TYPE_VARINT = 0,
- UPB_WIRE_TYPE_64BIT = 1,
- UPB_WIRE_TYPE_DELIMITED = 2,
- UPB_WIRE_TYPE_START_GROUP = 3,
- UPB_WIRE_TYPE_END_GROUP = 4,
- UPB_WIRE_TYPE_32BIT = 5,
-};
-
-// Type of a field as defined in a .proto file. eg. string, int32, etc. The
-// integers that represent this are defined by descriptor.proto. Note that
-// descriptor.proto reserves "0" for errors, and we use it to represent
-// exceptional circumstances.
-typedef uint8_t upb_fieldtype_t;
-
-// For referencing the type constants tersely.
-#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ ## type
-#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_ ## type
-
-// Info for a given field type.
-typedef struct {
- uint8_t align;
- uint8_t size;
- uint8_t native_wire_type;
- uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
- const char *ctype;
- bool is_numeric; // Only numeric types can be packed.
-} upb_type_info;
-
-// A static array of info about all of the field types, indexed by type number.
-extern const upb_type_info upb_types[];
-
-
/* upb_value ******************************************************************/
+// Clients should not need to access these enum values; they are used internally
+// to do typechecks of upb_value accesses.
+typedef enum {
+ UPB_CTYPE_INT32 = 1,
+ UPB_CTYPE_INT64 = 2,
+ UPB_CTYPE_UINT32 = 3,
+ UPB_CTYPE_UINT64 = 4,
+ UPB_CTYPE_DOUBLE = 5,
+ UPB_CTYPE_FLOAT = 6,
+ UPB_CTYPE_BOOL = 7,
+ UPB_CTYPE_PTR = 8,
+ UPB_CTYPE_BYTEREGION = 9,
+ UPB_CTYPE_FIELDDEF = 10,
+} upb_ctype_t;
+
struct _upb_byteregion;
struct _upb_fielddef;
-// Special constants for the upb_value.type field. These must not conflict
-// with any members of FieldDescriptorProto.Type.
-#define UPB_TYPE_ENDGROUP 0
-#define UPB_VALUETYPE_FIELDDEF 32
-#define UPB_VALUETYPE_PTR 33
-
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
typedef struct {
union {
uint64_t uint64;
- double _double;
- float _float;
int32_t int32;
int64_t int64;
uint32_t uint32;
+ double _double;
+ float _float;
bool _bool;
+ void *_void;
struct _upb_byteregion *byteregion;
const struct _upb_fielddef *fielddef;
- void *_void;
} val;
#ifndef NDEBUG
// In debug mode we carry the value type around also so we can check accesses
// to be sure the right member is being read.
- char type;
+ upb_ctype_t type;
#endif
} upb_value;
@@ -185,7 +141,7 @@ typedef struct {
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
- memset(val, 0, sizeof(*val)); \
+ val->val.uint64 = 0; \
SET_TYPE(val->type, proto_type); \
val->val.membername = cval; \
} \
@@ -195,21 +151,23 @@ typedef struct {
return ret; \
}
-UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
-UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
-UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
-UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
-UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
-UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
-UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
-UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
+UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_CTYPE_INT32);
+UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_CTYPE_INT64);
+UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_CTYPE_UINT32);
+UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_CTYPE_UINT64);
+UPB_VALUE_ACCESSORS(double, _double, double, UPB_CTYPE_DOUBLE);
+UPB_VALUE_ACCESSORS(float, _float, float, UPB_CTYPE_FLOAT);
+UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_CTYPE_BOOL);
+UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_CTYPE_PTR);
UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*,
- UPB_TYPE(STRING));
+ UPB_CTYPE_BYTEREGION);
// upb_fielddef should never be modified from a callback
// (ie. when they're getting passed through a upb_value).
UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*,
- UPB_VALUETYPE_FIELDDEF);
+ UPB_CTYPE_FIELDDEF);
+
+#undef UPB_VALUE_ACCESSORS
extern upb_value UPB_NO_VALUE;
@@ -262,7 +220,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);
extern upb_errorspace upb_posix_errorspace;
void upb_status_fromerrno(upb_status *status);
-bool upb_errno_is_wouldblock(void);
+bool upb_errno_is_wouldblock();
// Like vasprintf (which allocates a string large enough for the result), but
// uses *buf (which can be NULL) as a starting point and reallocates it only if
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback