summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile59
-rw-r--r--tests/bindings/googlepb/test_vs_proto2.cc14
-rw-r--r--tests/bindings/ruby/upb.rb62
-rw-r--r--tests/pb/test_decoder.cc18
-rw-r--r--tools/dump_cinit.lua10
-rw-r--r--upb/bindings/googlepb/bridge.cc1
-rw-r--r--upb/bindings/googlepb/proto1.cc27
-rw-r--r--upb/bindings/googlepb/proto2.cc220
-rw-r--r--upb/bindings/lua/upb.c21
-rw-r--r--upb/bindings/ruby/README2
-rw-r--r--upb/bindings/ruby/README.md30
-rw-r--r--upb/bindings/ruby/extconf.rb8
-rw-r--r--upb/bindings/ruby/upb.c1209
-rw-r--r--upb/def.c25
-rw-r--r--upb/def.h85
-rwxr-xr-xupb/descriptor/descriptor.upb.c162
-rw-r--r--upb/descriptor/reader.c11
-rw-r--r--upb/pb/compile_decoder.c308
-rw-r--r--upb/pb/compile_decoder_x64.c2
-rw-r--r--upb/pb/compile_decoder_x64.dasc28
-rw-r--r--upb/pb/encoder.c769
-rw-r--r--upb/pb/encoder.h163
-rw-r--r--upb/pb/varint.c23
-rw-r--r--upb/pb/varint.int.h9
-rw-r--r--upb/table.c106
-rw-r--r--upb/table.int.h29
26 files changed, 2416 insertions, 985 deletions
diff --git a/Makefile b/Makefile
index 9c5bef0..0d5ce56 100644
--- a/Makefile
+++ b/Makefile
@@ -44,8 +44,8 @@ CXX=c++
CFLAGS=-std=c99
CXXFLAGS=-Wno-unused-private-field
INCLUDE=-I.
-CPPFLAGS=$(INCLUDE) -DNDEBUG -Wall -Wextra -Wno-sign-compare $(USER_CPPFLAGS)
-LDLIBS=-lpthread upb/libupb.a
+WARNFLAGS=-Wall -Wextra -Wno-sign-compare
+CPPFLAGS=$(INCLUDE) -DNDEBUG $(USER_CPPFLAGS)
LUA=lua # 5.1 and 5.2 should both be supported
ifneq ($(WITH_JIT), no)
@@ -91,6 +91,7 @@ clean_leave_profile:
@rm -rf tools/upbc deps
@rm -rf upb/bindings/python/build
@rm -f upb/bindings/ruby/Makefile
+ @rm -f upb/bindings/ruby/upb.o
@rm -f upb/bindings/ruby/upb.so
@rm -f upb/bindings/ruby/mkmf.log
@find . | grep dSYM | xargs rm -rf
@@ -109,7 +110,7 @@ clean: clean_leave_profile
# lib/lib%.a: $(call make_objs,o)
# gcc -c -o $@ $^
#
-# SECONDEXPANSION: flips on a bit essentially that allows this "seconary
+# SECONDEXPANSION: flips on a bit essentially that allows this "secondary
# expansion": it must appear before anything that uses make_objs.
.SECONDEXPANSION:
to_srcs = $(subst .,_,$(1)_SRCS)
@@ -130,15 +131,16 @@ upb_SRCS = \
upb/upb.c \
upb_descriptor_SRCS = \
- upb/descriptor/reader.c \
upb/descriptor/descriptor.upb.c \
+ upb/descriptor/reader.c \
upb_pb_SRCS = \
- upb/pb/decoder.c \
upb/pb/compile_decoder.c \
+ upb/pb/decoder.c \
+ upb/pb/encoder.c \
upb/pb/glue.c \
- upb/pb/varint.c \
upb/pb/textprinter.c \
+ upb/pb/varint.c \
# If the JIT is enabled we include its source.
# If Lua is present we can use DynASM to regenerate the .h file.
@@ -146,7 +148,6 @@ ifdef USE_JIT
upb_pb_SRCS += upb/pb/compile_decoder_x64.c
obj/pb/compile_decoder_x64.o obj/pb/compile_decoder_x64.lo: upb/pb/compile_decoder_x64.h
obj/pb/compile_decoder_x64.o: CFLAGS=-std=gnu99
-obj/pb/compile_decoder_x64.o: OPT=-Os
upb/pb/compile_decoder_x64.h: upb/pb/compile_decoder_x64.dasc
$(E) DYNASM $<
@@ -164,6 +165,11 @@ lib/libupb.a : OPT = -Os
lib/libupb.descriptor.a : OPT = -Os
obj/pb/compile_decoder.o : OPT = -Os
obj/pb/compile_decoder_64.o : OPT = -Os
+
+ifdef USE_JIT
+obj/pb/compile_decoder_x64.o: OPT=-Os
+endif
+
endif
$(UPB_PICLIBS): lib/lib%_pic.a: $(call make_objs,lo)
@@ -177,32 +183,32 @@ $(UPB_LIBS): lib/lib%.a: $(call make_objs,o)
obj/%.o: upb/%.c | $$(@D)/.
$(E) CC $<
- $(Q) $(CC) $(OPT) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+ $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
obj/%.o: upb/%.cc | $$(@D)/.
$(E) CXX $<
- $(Q) $(CXX) $(OPT) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+ $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
obj/%.lo: upb/%.c | $$(@D)/.
$(E) 'CC -fPIC' $<
- $(Q) $(CC) $(OPT) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -fPIC
+ $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -fPIC
obj/%.lo: upb/%.cc | $$(@D)/.
$(E) CXX $<
- $(Q) $(CXX) $(OPT) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -fPIC
+ $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -fPIC
# Note: mkdir -p is technically susceptible to races when used with make -j.
%/.:
$(Q) mkdir -p $@
# Regenerating the auto-generated files in upb/.
-upb/descriptor.pb: upb/descriptor.proto
+upb/descriptor/descriptor.pb: upb/descriptor/descriptor.proto
@# TODO: replace with upbc
- protoc upb/descriptor.proto -oupb/descriptor.pb
+ protoc upb/descriptor/descriptor.proto -oupb/descriptor/descriptor.pb
-descriptorgen: upb/descriptor.pb tools/upbc
+descriptorgen: upb/descriptor/descriptor.pb tools/upbc
@# Regenerate descriptor_const.h
- ./tools/upbc -o upb/descriptor upb/descriptor.pb
+ ./tools/upbc -o upb/descriptor/descriptor upb/descriptor/descriptor.pb
tools/upbc: tools/upbc.c $(LIBUPB)
$(E) CC $<
@@ -232,15 +238,15 @@ tests: $(TESTS)
tests/testmain.o: tests/testmain.cc
$(E) CXX $<
- $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
+ $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
$(C_TESTS): % : %.c tests/testmain.o $$(LIBS)
$(E) CC $<
- $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ tests/testmain.o $< $(LIBS)
+ $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -o $@ tests/testmain.o $< $(LIBS)
$(CC_TESTS): % : %.cc tests/testmain.o $$(LIBS)
$(E) CXX $<
- $(Q) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -Wno-deprecated -o $@ tests/testmain.o $< $(LIBS)
+ $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -Wno-deprecated -o $@ tests/testmain.o $< $(LIBS)
# Several of these tests don't actually test these libs, but use them
# incidentally to load a descriptor
@@ -337,7 +343,7 @@ GOOGLEPB_TEST_DEPS = \
tests/bindings/googlepb/test_vs_proto2.googlemessage1: $(GOOGLEPB_TEST_DEPS) \
benchmarks/googlemessage1.h
$(E) CXX $< '(benchmarks::SpeedMessage1)'
- $(Q) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \
+ $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \
-DMESSAGE_CIDENT="benchmarks::SpeedMessage1" \
-DMESSAGE_DATA_HFILE=\"benchmarks/googlemessage1.h\" \
benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread \
@@ -346,7 +352,7 @@ tests/bindings/googlepb/test_vs_proto2.googlemessage1: $(GOOGLEPB_TEST_DEPS) \
tests/bindings/googlepb/test_vs_proto2.googlemessage2: $(GOOGLEPB_TEST_DEPS) \
benchmarks/googlemessage2.h
$(E) CXX $< '(benchmarks::SpeedMessage2)'
- $(Q) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \
+ $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \
-DMESSAGE_CIDENT="benchmarks::SpeedMessage2" \
-DMESSAGE_DATA_HFILE=\"benchmarks/googlemessage2.h\" \
benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread \
@@ -401,7 +407,7 @@ LUA_LIB_DEPS = \
upb/bindings/lua/upb.so: upb/bindings/lua/upb.c upb/bindings/lua/upb.lua.h $(LUA_LIB_DEPS)
$(E) CC upb/bindings/lua/upb.c
- $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS) $(LUA_LIB_DEPS)
+ $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS) $(LUA_LIB_DEPS)
# TODO: the dependency between upb/pb.so and upb.so is expressed at the
# .so level, which means that the OS will try to load upb.so when upb/pb.so
@@ -414,7 +420,7 @@ upb/bindings/lua/upb.so: upb/bindings/lua/upb.c upb/bindings/lua/upb.lua.h $(LUA
# be expressed at the .so level.
upb/bindings/lua/upb/pb.so: upb/bindings/lua/upb/pb.c upb/bindings/lua/upb.so
$(E) CC upb/bindings/lua/upb.pb.c
- $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< upb/bindings/lua/upb.so $(LUA_LDFLAGS)
+ $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< upb/bindings/lua/upb.so $(LUA_LDFLAGS)
# Python extension #############################################################
@@ -434,9 +440,16 @@ pythontest: $(PYTHONEXT)
RUBY=ruby
RUBYEXT=upb/bindings/ruby/upb.so
ruby: $(RUBYEXT)
+
+# We pass our important flags to Ruby, but leave the warning flags out.
+# Some uses of the Ruby/C API trigger the warnings we normally use, so
+# we let Ruby decide the set of warning options to use.
upb/bindings/ruby/Makefile: upb/bindings/ruby/extconf.rb lib/libupb_pic.a lib/libupb.pb_pic.a lib/libupb.descriptor_pic.a
$(E) RUBY upb/bindings/ruby/extconf.rb
- $(Q) cd upb/bindings/ruby && ruby extconf.rb
+ $(Q) cd upb/bindings/ruby && ruby extconf.rb "$(OPT) $(CPPFLAGS) $(CFLAGS)"
$(RUBYEXT): upb/bindings/ruby/upb.c upb/bindings/ruby/Makefile
$(E) CC upb/bindings/ruby/upb.c
$(Q) cd upb/bindings/ruby && make
+
+rubytest: $(RUBYEXT) upb/descriptor/descriptor.pb
+ RUBYLIB="upb/bindings/ruby" ruby tests/bindings/ruby/upb.rb
diff --git a/tests/bindings/googlepb/test_vs_proto2.cc b/tests/bindings/googlepb/test_vs_proto2.cc
index 2d53f80..15a5388 100644
--- a/tests/bindings/googlepb/test_vs_proto2.cc
+++ b/tests/bindings/googlepb/test_vs_proto2.cc
@@ -27,8 +27,14 @@
#include "upb/pb/glue.h"
#include "upb/pb/varint.int.h"
-static const char message_data[] = {
-#include MESSAGE_DATA_HFILE
+// Pull in string data from benchmarks/google_message{1,2}.dat
+// (the .h files are generated with xxd).
+const char message1_data[] = {
+#include "benchmarks/google_message1.h"
+};
+
+const char message2_data[] = {
+#include "benchmarks/google_message2.h"
};
void compare_metadata(const google::protobuf::Descriptor* d,
@@ -117,8 +123,8 @@ extern "C" {
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);
UPB_UNUSED(argv);
- size_t len = sizeof(message_data);
- const char *str = message_data;
+ size_t len = sizeof(MESSAGE_DATA_IDENT);
+ const char *str = MESSAGE_DATA_IDENT;
MESSAGE_CIDENT msg1;
MESSAGE_CIDENT msg2;
diff --git a/tests/bindings/ruby/upb.rb b/tests/bindings/ruby/upb.rb
new file mode 100644
index 0000000..3e06c17
--- /dev/null
+++ b/tests/bindings/ruby/upb.rb
@@ -0,0 +1,62 @@
+#!/usr/bin/ruby
+#
+# Tests for Ruby upb extension.
+
+require 'test/unit'
+require 'set'
+require 'upb'
+
+def get_descriptor
+ File.open("upb/descriptor/descriptor.pb").read
+end
+
+def load_descriptor
+ symtab = Upb::SymbolTable.new
+ symtab.load_descriptor(get_descriptor())
+ return symtab
+end
+
+def get_message_class(name)
+ return Upb.get_message_class(load_descriptor().lookup(name))
+end
+
+class TestRubyExtension < Test::Unit::TestCase
+ def test_parsedescriptor
+ msgdef = load_descriptor.lookup("google.protobuf.FileDescriptorSet")
+ assert_instance_of(Upb::MessageDef, msgdef)
+
+ file_descriptor_set = Upb.get_message_class(msgdef)
+ msg = file_descriptor_set.parse(get_descriptor())
+
+ # A couple message types we know should exist.
+ names = Set.new(["DescriptorProto", "FieldDescriptorProto"])
+
+ msg.file.each { |file|
+ file.message_type.each { |message_type|
+ names.delete(message_type.name)
+ }
+ }
+
+ assert_equal(0, names.size)
+ end
+
+ def test_parseserialize
+ field_descriptor_proto = get_message_class("google.protobuf.FieldDescriptorProto")
+ field_options = get_message_class("google.protobuf.FieldOptions")
+
+ field = field_descriptor_proto.new
+
+ field.name = "MyName"
+ field.number = 5
+ field.options = field_options.new
+ field.options.packed = true
+
+ serialized = Upb::Message.serialize(field)
+
+ field2 = field_descriptor_proto.parse(serialized)
+
+ assert_equal("MyName", field2.name)
+ assert_equal(5, field2.number)
+ assert_equal(true, field2.options.packed)
+ end
+end
diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc
index 2d94d82..d976a54 100644
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@@ -207,8 +207,16 @@ void indentbuf(string *buf, int depth) {
buf->append(2 * depth, ' ');
}
+void check_stack_alignment() {
+#ifdef UPB_USE_JIT_X64
+ void *rsp = __builtin_frame_address(0);
+ ASSERT(((uintptr_t)rsp % 16) == 0);
+#endif
+}
+
#define NUMERIC_VALUE_HANDLER(member, ctype, fmt) \
bool value_##member(int* depth, const uint32_t* num, ctype val) { \
+ check_stack_alignment(); \
indentbuf(&output, *depth); \
appendf(&output, "%" PRIu32 ":%" fmt "\n", *num, val); \
return true; \
@@ -222,12 +230,14 @@ NUMERIC_VALUE_HANDLER(float, float, "g")
NUMERIC_VALUE_HANDLER(double, double, "g")
bool value_bool(int* depth, const uint32_t* num, bool val) {
+ check_stack_alignment();
indentbuf(&output, *depth);
appendf(&output, "%" PRIu32 ":%s\n", *num, val ? "true" : "false");
return true;
}
int* startstr(int* depth, const uint32_t* num, size_t size_hint) {
+ check_stack_alignment();
indentbuf(&output, *depth);
appendf(&output, "%" PRIu32 ":(%zu)\"", *num, size_hint);
return depth + 1;
@@ -237,6 +247,7 @@ size_t value_string(int* depth, const uint32_t* num, const char* buf,
size_t n, const upb::BufferHandle* handle) {
UPB_UNUSED(num);
UPB_UNUSED(depth);
+ check_stack_alignment();
output.append(buf, n);
ASSERT(handle == &global_handle);
return n;
@@ -245,11 +256,13 @@ size_t value_string(int* depth, const uint32_t* num, const char* buf,
bool endstr(int* depth, const uint32_t* num) {
UPB_UNUSED(depth);
UPB_UNUSED(num);
+ check_stack_alignment();
output.append("\"\n");
return true;
}
int* startsubmsg(int* depth, const uint32_t* num) {
+ check_stack_alignment();
indentbuf(&output, *depth);
appendf(&output, "%" PRIu32 ":{\n", *num);
return depth + 1;
@@ -257,12 +270,14 @@ int* startsubmsg(int* depth, const uint32_t* num) {
bool endsubmsg(int* depth, const uint32_t* num) {
UPB_UNUSED(num);
+ check_stack_alignment();
indentbuf(&output, *depth);
output.append("}\n");
return true;
}
int* startseq(int* depth, const uint32_t* num) {
+ check_stack_alignment();
indentbuf(&output, *depth);
appendf(&output, "%" PRIu32 ":[\n", *num);
return depth + 1;
@@ -270,12 +285,14 @@ int* startseq(int* depth, const uint32_t* num) {
bool endseq(int* depth, const uint32_t* num) {
UPB_UNUSED(num);
+ check_stack_alignment();
indentbuf(&output, *depth);
output.append("]\n");
return true;
}
bool startmsg(int* depth) {
+ check_stack_alignment();
indentbuf(&output, *depth);
output.append("<\n");
return true;
@@ -283,6 +300,7 @@ bool startmsg(int* depth) {
bool endmsg(int* depth, upb_status* status) {
UPB_UNUSED(status);
+ check_stack_alignment();
indentbuf(&output, *depth);
output.append(">\n");
return true;
diff --git a/tools/dump_cinit.lua b/tools/dump_cinit.lua
index 2988082..13e1f52 100644
--- a/tools/dump_cinit.lua
+++ b/tools/dump_cinit.lua
@@ -453,14 +453,14 @@ local function dump_defs_c(symtab, basename, namespace, append)
intfmt = "0"
end
-- UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy,
- -- name, num, msgdef, subdef, selector_base, index,
- -- default_value)
- append(' UPB_FIELDDEF_INIT(%s, %s, %s, %s, %s, %s, "%s", %d, %s, %s, ' ..
- '%d, %d, {0},' .. -- TODO: support default value
+ -- packed, name, num, msgdef, subdef, selector_base,
+ -- index, -- default_value)
+ append(' UPB_FIELDDEF_INIT(%s, %s, %s, %s, %s, %s, %s, "%s", %d, %s, ' ..
+ '%s, %d, %d, {0},' .. -- TODO: support default value
'&reftables[%d], &reftables[%d]),\n',
const(f, "label"), const(f, "type"), intfmt,
boolstr(f:istagdelim()), boolstr(f:is_extension()),
- boolstr(f:lazy()), f:name(), f:number(),
+ boolstr(f:lazy()), boolstr(f:packed()), f:name(), f:number(),
linktab:addr(f:containing_type()), subdef,
f:_selector_base(), f:index(),
reftable, reftable + 1
diff --git a/upb/bindings/googlepb/bridge.cc b/upb/bindings/googlepb/bridge.cc
index a666ff6..6ae8868 100644
--- a/upb/bindings/googlepb/bridge.cc
+++ b/upb/bindings/googlepb/bridge.cc
@@ -115,6 +115,7 @@ reffed_ptr<FieldDef> DefBuilder::NewFieldDef(const goog::FieldDescriptor* f,
upb_f->set_number(f->number(), &status);
upb_f->set_label(FieldDef::ConvertLabel(f->label()));
upb_f->set_descriptor_type(FieldDef::ConvertDescriptorType(f->type()));
+ upb_f->set_packed(f->options().packed());
#ifdef UPB_GOOGLE3
upb_f->set_lazy(f->options().lazy());
#endif
diff --git a/upb/bindings/googlepb/proto1.cc b/upb/bindings/googlepb/proto1.cc
index 0b46fed..68b572c 100644
--- a/upb/bindings/googlepb/proto1.cc
+++ b/upb/bindings/googlepb/proto1.cc
@@ -30,6 +30,10 @@
#undef private
#undef protected
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+namespace proto2 { class Arena; }
+#endif
+
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/shim/shim.h"
@@ -448,12 +452,35 @@ class P2R_Handlers {
class RepeatedMessageTypeHandler {
public:
typedef proto2::Message Type;
+#ifndef GOOGLE_PROTOBUF_HAS_ARENAS
// AddAllocated() calls this, but only if other objects are sitting
// around waiting for reuse, which we will not do.
static void Delete(Type* t) {
UPB_UNUSED(t);
assert(false);
}
+#else
+ static ::proto2::Arena* GetArena(Type* t) {
+ return t->GetArena();
+ }
+ static void* GetMaybeArenaPointer(Type* t) {
+ return t->GetMaybeArenaPointer();
+ }
+ static inline Type* NewFromPrototype(
+ const Type* prototype, ::proto2::Arena* arena = NULL) {
+ return prototype->New(arena);
+ }
+ // AddAllocated() calls this, but only if other objects are sitting
+ // around waiting for reuse, which we will not do.
+ static void Delete(Type* t, ::proto2::Arena* arena) {
+ UPB_UNUSED(t);
+ UPB_UNUSED(arena);
+ assert(false);
+ }
+ static void Merge(const Type& from, Type* to) {
+ to->MergeFrom(from);
+ }
+#endif
};
// Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc
index 657f802..498ae2d 100644
--- a/upb/bindings/googlepb/proto2.cc
+++ b/upb/bindings/googlepb/proto2.cc
@@ -261,11 +261,64 @@ case goog::FieldDescriptor::cpptype: \
return r->offsets_[index];
}
- class FieldOffset {
+ // Base class that provides access to elements of the message as a whole, such
+ // as the unknown-field set, and is inherited by context classes for specific
+ // field handlers.
+ class FieldDataBase {
+ public:
+ FieldDataBase(const goog::internal::GeneratedMessageReflection* r)
+ : unknown_fields_offset_(r->unknown_fields_offset_)
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ , arena_offset_(r->arena_offset_)
+#endif // GOOGLE_PROTOBUF_HAS_ARENAS
+ {}
+
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ goog::Arena* GetArena(const goog::Message& message) const {
+ if (unknown_fields_offset_ ==
+ goog::internal::GeneratedMessageReflection::
+ kUnknownFieldSetInMetadata) {
+ const goog::internal::InternalMetadataWithArena* metadata =
+ GetConstPointer<goog::internal::InternalMetadataWithArena>(
+ &message, arena_offset_);
+ return metadata->arena();
+ } else if (arena_offset_ !=
+ goog::internal::GeneratedMessageReflection::kNoArenaPointer) {
+ return *GetConstPointer<goog::Arena*>(&message, arena_offset_);
+ } else {
+ return NULL;
+ }
+ }
+
+ goog::UnknownFieldSet* GetUnknownFieldSet(goog::Message* message) const {
+ if (unknown_fields_offset_ ==
+ goog::internal::GeneratedMessageReflection::
+ kUnknownFieldSetInMetadata) {
+ goog::internal::InternalMetadataWithArena* metadata =
+ GetPointer<goog::internal::InternalMetadataWithArena>(
+ message, arena_offset_);
+ return metadata->mutable_unknown_fields();
+ }
+ return GetPointer<goog::UnknownFieldSet>(message, unknown_fields_offset_);
+ }
+#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ goog::UnknownFieldSet* GetUnknownFieldSet(goog::Message* message) const {
+ return GetPointer<goog::UnknownFieldSet>(message, unknown_fields_offset_);
+ }
+#endif // ifdef !GOOGLE_PROTOBUF_HAS_ARENAS
+ private:
+ int unknown_fields_offset_;
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ int arena_offset_;
+#endif // GOOGLE_PROTOBUF_HAS_ARENAS
+ };
+
+ class FieldOffset : public FieldDataBase {
public:
FieldOffset(const goog::FieldDescriptor* f,
const goog::internal::GeneratedMessageReflection* r)
- : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) {
+ : FieldDataBase(r),
+ offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) {
if (!is_repeated_) {
int64_t hasbit = GetHasbit(f, r);
hasbyte_ = hasbit / 8;
@@ -293,11 +346,12 @@ case goog::FieldDescriptor::cpptype: \
};
#ifdef GOOGLE_PROTOBUF_HAS_ONEOF
- class OneofFieldData {
+ class OneofFieldData : public FieldDataBase {
public:
OneofFieldData(const goog::FieldDescriptor* f,
const goog::internal::GeneratedMessageReflection* r)
- : field_number_offset_(GetOneofDiscriminantOffset(f, r)),
+ : FieldDataBase(r),
+ field_number_offset_(GetOneofDiscriminantOffset(f, r)),
field_number_(f->number()) {
const goog::OneofDescriptor* oneof = f->containing_oneof();
@@ -343,6 +397,40 @@ case goog::FieldDescriptor::cpptype: \
return GetPointer<int32_t>(message, field_number_offset_);
}
+ void ClearOneof(goog::Message* m, const FieldOffset* ofs,
+ int field_number) const {
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ if (GetArena(*m) != NULL) {
+ return;
+ }
+#endif
+ switch (types_.at(field_number)) {
+ case ONEOF_TYPE_NONE:
+ break;
+ case ONEOF_TYPE_STRING:
+ delete *ofs->GetFieldPointer<std::string*>(m);
+ break;
+ case ONEOF_TYPE_MESSAGE:
+ delete *ofs->GetFieldPointer<goog::Message*>(m);
+ break;
+#ifdef UPB_GOOGLE3
+ case ONEOF_TYPE_GLOBALSTRING:
+ delete *ofs->GetFieldPointer<string*>(m);
+ break;
+ case ONEOF_TYPE_CORD:
+ delete *ofs->GetFieldPointer<Cord*>(m);
+ break;
+ case ONEOF_TYPE_STRINGPIECE:
+ delete *ofs->GetFieldPointer<
+ goog::internal::StringPieceField*>(m);
+ break;
+ case ONEOF_TYPE_LAZYFIELD:
+ delete *ofs->GetFieldPointer<goog::internal::LazyField*>(m);
+ break;
+#endif
+ }
+ }
+
// Returns whether this is different than the previous value of the
// field_number; this implies that the current value was freed (if
// necessary) and the caller should allocate a new instance.
@@ -351,30 +439,7 @@ case goog::FieldDescriptor::cpptype: \
if (*field_number == field_number_) {
return false;
} else {
- switch (types_.at(*field_number)) {
- case ONEOF_TYPE_NONE:
- break;
- case ONEOF_TYPE_STRING:
- delete *ofs->GetFieldPointer<std::string*>(m);
- break;
- case ONEOF_TYPE_MESSAGE:
- delete *ofs->GetFieldPointer<goog::Message*>(m);
- break;
-#ifdef UPB_GOOGLE3
- case ONEOF_TYPE_GLOBALSTRING:
- delete *ofs->GetFieldPointer<string*>(m);
- break;
- case ONEOF_TYPE_CORD:
- delete *ofs->GetFieldPointer<Cord*>(m);
- break;
- case ONEOF_TYPE_STRINGPIECE:
- delete *ofs->GetFieldPointer<goog::internal::StringPieceField*>(m);
- break;
- case ONEOF_TYPE_LAZYFIELD:
- delete *ofs->GetFieldPointer<goog::internal::LazyField*>(m);
- break;
-#endif
- }
+ ClearOneof(m, ofs, *field_number);
*field_number = field_number_;
return true;
}
@@ -578,7 +643,6 @@ case goog::FieldDescriptor::cpptype: \
const upb::FieldDef* f)
: FieldOffset(proto2_f, r),
field_number_(f->number()),
- unknown_fields_offset_(r->unknown_fields_offset_),
enum_(upb_downcast_enumdef(f->subdef())) {}
bool IsValidValue(int32_t val) const {
@@ -587,13 +651,8 @@ case goog::FieldDescriptor::cpptype: \
int32_t field_number() const { return field_number_; }
- goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const {
- return GetPointer<goog::UnknownFieldSet>(m, unknown_fields_offset_);
- }
-
private:
int32_t field_number_;
- size_t unknown_fields_offset_;
const upb::EnumDef* enum_;
};
@@ -617,7 +676,7 @@ case goog::FieldDescriptor::cpptype: \
*message_val = val;
data->SetHasbit(m);
} else {
- data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val);
+ data->GetUnknownFieldSet(m)->AddVarint(data->field_number(), val);
}
}
@@ -631,7 +690,7 @@ case goog::FieldDescriptor::cpptype: \
data->GetFieldPointer<goog::RepeatedField<int32_t> >(m);
r->Add(val);
} else {
- data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val);
+ data->GetUnknownFieldSet(m)->AddVarint(data->field_number(), val);
}
}
@@ -718,7 +777,14 @@ case goog::FieldDescriptor::cpptype: \
T** str = data->GetStringPointer(m);
data->SetHasbit(m);
// If it points to the default instance, we must create a new instance.
- if (*str == data->prototype()) *str = new T();
+ if (*str == data->prototype()) {
+ *str = new T();
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ if (data->GetArena(*m)) {
+ data->GetArena(*m)->Own(*str);
+ }
+#endif
+ }
(*str)->clear();
// reserve() here appears to hurt performance rather than help.
return *str;
@@ -749,6 +815,16 @@ case goog::FieldDescriptor::cpptype: \
T** str = ofs->GetFieldPointer<T*>(m);
if (data->SetOneofHas(m)) {
*str = new T();
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ // Note that in the main proto2-arenas implementation, the parsing code
+ // creates ArenaString instances for string field data, and the
+ // implementation later dynamically converts to ::string if a mutable
+ // version is requested. To keep complexity down in this binding, we
+ // create an ordinary string and allow the arena to own its destruction.
+ if (data->GetArena(*m) != NULL) {
+ data->GetArena(*m)->Own(*str);
+ }
+#endif
} else {
(*str)->clear();
}
@@ -857,7 +933,11 @@ case goog::FieldDescriptor::cpptype: \
data->SetHasbit(m);
goog::Message** subm = data->GetFieldPointer<goog::Message*>(m);
if (*subm == NULL || *subm == data->prototype()) {
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ *subm = data->prototype()->New(data->GetArena(*m));
+#else
*subm = data->prototype()->New();
+#endif
}
return *subm;
}
@@ -865,14 +945,50 @@ case goog::FieldDescriptor::cpptype: \
class RepeatedMessageTypeHandler {
public:
typedef goog::Message Type;
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ static ::proto2::Arena* GetArena(Type* t) {
+ return t->GetArena();
+ }
+ static void* GetMaybeArenaPointer(Type* t) {
+ return t->GetMaybeArenaPointer();
+ }
+ static inline Type* NewFromPrototype(
+ const Type* prototype, ::proto2::Arena* arena = NULL) {
+ return prototype->New(arena);
+ }
+ static void Delete(Type* t, goog::Arena* arena = NULL) {
+ if (arena == NULL) {
+ delete t;
+ }
+ }
+#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ static inline Type* NewFromPrototype(const Type* prototype) {
+ return prototype->New();
+ }
// AddAllocated() calls this, but only if other objects are sitting
// around waiting for reuse, which we will not do.
static void Delete(Type* t) {
UPB_UNUSED(t);
assert(false);
}
+#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+
+ static void Merge(const Type& from, Type* to) {
+ to->MergeFrom(from);
+ }
};
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+ // its base class RepeatedPtrFieldBase*.
+ static goog::Message* StartRepeatedSubMessage(
+ goog::internal::RepeatedPtrFieldBase* r,
+ const SubMessageHandlerData* data) {
+ goog::Message* submsg = data->prototype()->New(r->GetArenaNoVirtual());
+ r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+ return submsg;
+ }
+#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
// Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
// its base class RepeatedPtrFieldBase*.
static goog::Message* StartRepeatedSubMessage(
@@ -886,13 +1002,19 @@ case goog::FieldDescriptor::cpptype: \
return submsg;
}
+#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+
#ifdef GOOGLE_PROTOBUF_HAS_ONEOF
static goog::Message* StartOneofSubMessage(
goog::Message* m, const OneofSubMessageHandlerData* data) {
const FieldOffset* ofs = data;
goog::Message** subm = ofs->GetFieldPointer<goog::Message*>(m);
if (data->SetOneofHas(m)) {
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ *subm = data->prototype()->New(data->GetArena(*m));
+#else
*subm = data->prototype()->New();
+#endif
}
return *subm;
}
@@ -1123,9 +1245,21 @@ case goog::FieldDescriptor::cpptype: \
LazyMessageExtensionImpl() {}
virtual ~LazyMessageExtensionImpl() {}
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ virtual LazyMessageExtension* New() const {
+ return New(NULL);
+ }
+
+ virtual LazyMessageExtension* New(proto2::Arena* arena) const {
+ LazyMessageExtensionImpl* message =
+ ::proto2::Arena::Create<LazyMessageExtensionImpl>(arena);
+ return message;
+ }
+#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
virtual LazyMessageExtension* New() const {
return new LazyMessageExtensionImpl();
}
+#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS
virtual const proto2::MessageLite& GetMessage(
const proto2::MessageLite& prototype) const {
@@ -1149,6 +1283,12 @@ case goog::FieldDescriptor::cpptype: \
static_cast<const proto2::Message&>(prototype));
}
+ virtual proto2::MessageLite* UnsafeArenaReleaseMessage(
+ const proto2::MessageLite& prototype) {
+ return lazy_field_.UnsafeArenaReleaseByPrototype(
+ static_cast<const proto2::Message&>(prototype));
+ }
+
virtual bool IsInitialized() const { return true; }
virtual int ByteSize() const { return lazy_field_.MessageByteSize(); }
@@ -1201,7 +1341,13 @@ case goog::FieldDescriptor::cpptype: \
LazyMessageExtensionImpl* lazy_extension;
if (set->MaybeNewExtension(data->number(), data->field_descriptor(),
&item)) {
+#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
+ lazy_extension =
+ ::proto2::Arena::Create<LazyMessageExtensionImpl>(
+ m->GetArena());
+#else
lazy_extension = new LazyMessageExtensionImpl();
+#endif
item->type = UPB_DESCRIPTOR_TYPE_MESSAGE;
item->is_repeated = false;
item->is_lazy = true;
diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c
index f257430..2bd78af 100644
--- a/upb/bindings/lua/upb.c
+++ b/upb/bindings/lua/upb.c
@@ -640,6 +640,12 @@ static int lupb_fielddef_number(lua_State *L) {
return 1;
}
+static int lupb_fielddef_packed(lua_State *L) {
+ const upb_fielddef *f = lupb_fielddef_check(L, 1);
+ lua_pushboolean(L, upb_fielddef_packed(f));
+ return 1;
+}
+
static int lupb_fielddef_subdef(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
if (!upb_fielddef_hassubdef(f))
@@ -753,6 +759,12 @@ static int lupb_fielddef_setnumber(lua_State *L) {
return 0;
}
+static int lupb_fielddef_setpacked(lua_State *L) {
+ upb_fielddef *f = lupb_fielddef_checkmutable(L, 1);
+ upb_fielddef_setpacked(f, lupb_checkbool(L, 2));
+ return 0;
+}
+
static int lupb_fielddef_setsubdef(lua_State *L) {
upb_fielddef *f = lupb_fielddef_checkmutable(L, 1);
const upb_def *def = NULL;
@@ -817,6 +829,7 @@ static const struct luaL_Reg lupb_fielddef_m[] = {
{"lazy", lupb_fielddef_lazy},
{"name", lupb_fielddef_name},
{"number", lupb_fielddef_number},
+ {"packed", lupb_fielddef_packed},
{"subdef", lupb_fielddef_subdef},
{"subdef_name", lupb_fielddef_subdefname},
{"type", lupb_fielddef_type},
@@ -828,6 +841,7 @@ static const struct luaL_Reg lupb_fielddef_m[] = {
{"set_lazy", lupb_fielddef_setlazy},
{"set_name", lupb_fielddef_setname},
{"set_number", lupb_fielddef_setnumber},
+ {"set_packed", lupb_fielddef_setpacked},
{"set_subdef", lupb_fielddef_setsubdef},
{"set_subdef_name", lupb_fielddef_setsubdefname},
{"set_type", lupb_fielddef_settype},
@@ -926,7 +940,7 @@ static int lupb_msgdef_field(lua_State *L) {
if (type == LUA_TNUMBER) {
f = upb_msgdef_itof(m, lua_tointeger(L, 2));
} else if (type == LUA_TSTRING) {
- f = upb_msgdef_ntof(m, lua_tostring(L, 2));
+ f = upb_msgdef_ntofz(m, lua_tostring(L, 2));
} else {
const char *msg = lua_pushfstring(L, "number or string expected, got %s",
luaL_typename(L, 2));
@@ -1358,8 +1372,9 @@ const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg) {
static const upb_fielddef *lupb_msg_checkfield(lua_State *L,
const lupb_msgdef *lmd,
int fieldarg) {
- const char *fieldname = luaL_checkstring(L, fieldarg);
- const upb_fielddef *f = upb_msgdef_ntof(lmd->md, fieldname);
+ size_t len;
+ const char *fieldname = luaL_checklstring(L, fieldarg, &len);
+ const upb_fielddef *f = upb_msgdef_ntof(lmd->md, fieldname, len);
if (!f) {
const char *msg = lua_pushfstring(L, "no such field: %s", fieldname);
diff --git a/upb/bindings/ruby/README b/upb/bindings/ruby/README
deleted file mode 100644
index 50fd746..0000000
--- a/upb/bindings/ruby/README
+++ /dev/null
@@ -1,2 +0,0 @@
-This is PROTOTYPE code -- all interfaces are experimental
-and will almost certainly change.
diff --git a/upb/bindings/ruby/README.md b/upb/bindings/ruby/README.md
new file mode 100644
index 0000000..12a7169
--- /dev/null
+++ b/upb/bindings/ruby/README.md
@@ -0,0 +1,30 @@
+
+# Ruby extension
+
+To build, run (from the top upb directory):
+
+ $ make ruby
+ $ sudo make install
+
+To test, run:
+
+ $ make rubytest
+
+The binding currently supports:
+
+ - loading message types from descriptors.
+ - constructing message instances
+ - reading and writing their members
+ - parsing and serializing the messages
+ - all data types (including nested and repeated)
+
+The binding does *not* currently support:
+
+ - defining message types directly in Ruby code.
+ - generating Ruby code for a .proto file.
+ - type-checking for setters
+ - homogenous / type-checked arrays
+ - default values
+
+Because code generation is not currently implemented, the interface to import
+a specific message type is kind of clunky for the moment.
diff --git a/upb/bindings/ruby/extconf.rb b/upb/bindings/ruby/extconf.rb
index 3637511..b105948 100644
--- a/upb/bindings/ruby/extconf.rb
+++ b/upb/bindings/ruby/extconf.rb
@@ -1,9 +1,13 @@
#!/usr/bin/ruby
require 'mkmf'
+
+# Extra args are passed on the command-line.
+$CFLAGS += (" " + ARGV[0])
+
find_header("upb/upb.h", "../../..") or raise "Can't find upb headers"
find_library("upb_pic", "upb_msgdef_new", "../../../lib") or raise "Can't find upb lib"
-find_library("upb.pb_pic", "upb_decoder_init", "../../../lib") or raise "Can't find upb.pb lib"
find_library("upb.descriptor_pic", "upb_descreader_init", "../../../lib") or raise "Can't find upb.descriptor lib"
-$CFLAGS += " -Wall"
+find_library("upb.pb_pic", "upb_pbdecoder_init", "../../../lib") or raise "Can't find upb.pb lib"
+
create_makefile("upb")
diff --git a/upb/bindings/ruby/upb.c b/upb/bindings/ruby/upb.c
index 0d25610..2817a15 100644
--- a/upb/bindings/ruby/upb.c
+++ b/upb/bindings/ruby/upb.c
@@ -1,42 +1,41 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb (prototype) extension for Ruby.
+ */
+
+#include "ruby/ruby.h"
+#include "ruby/vm.h"
-#include "ruby.h"
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
+#include "upb/pb/encoder.h"
#include "upb/pb/glue.h"
#include "upb/shim/shim.h"
#include "upb/symtab.h"
+// References to global state.
+//
+// Ruby does not have multi-VM support and it is common practice to store
+// references to classes and other per-VM state in global variables.
+static VALUE cSymbolTable;
static VALUE cMessageDef;
static VALUE cMessage;
+static VALUE message_map;
+static upb_inttable objcache;
+static bool objcache_initialized = false;
-// Wrapper around a upb_msgdef.
-typedef struct {
- // The msgdef for this message, and a DecoderMethod to parse protobufs and
- // fill a message.
- //
- // We own refs on both of these.
- const upb_msgdef *md;
- const upb_pbdecodermethod *fill_method;
-
- size_t size;
- uint32_t *field_offsets;
-} rb_msgdef;
-
-// Ruby message object.
-// This will be sized according to what fields are actually present.
-typedef struct {
- union u {
- VALUE rbmsgdef;
- char data[1];
- } data;
-} rb_msg;
+struct rupb_Message;
+struct rupb_MessageDef;
+typedef struct rupb_Message rupb_Message;
+typedef struct rupb_MessageDef rupb_MessageDef;
-#define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs])
-
-static void symtab_free(void *md) {
- upb_symtab_unref(md, UPB_UNTRACKED_REF);
-}
+#define DEREF_RAW(ptr, ofs, type) *(type*)((char*)ptr + ofs)
+#define DEREF(msg, ofs, type) *(type*)(&msg->data[ofs])
void rupb_checkstatus(upb_status *s) {
if (!upb_ok(s)) {
@@ -44,69 +43,195 @@ void rupb_checkstatus(upb_status *s) {
}
}
-/* handlers *******************************************************************/
+static rupb_MessageDef *msgdef_get(VALUE self);
+static rupb_Message *msg_get(VALUE self);
+static const rupb_MessageDef *get_rbmsgdef(const upb_msgdef *md);
+static const upb_handlers *new_fill_handlers(const rupb_MessageDef *rmd,
+ const void *owner);
+static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd,
+ upb_sink *sink);
+static VALUE msgdef_getwrapper(const upb_msgdef *md);
+static VALUE new_message_class(VALUE message_def);
+static VALUE get_message_class(VALUE klass, VALUE message);
+static VALUE msg_new(VALUE msgdef);
+
+/* Ruby VALUE <-> C primitive conversions *************************************/
+
+// Ruby VALUE -> C.
+// TODO(haberman): add type/range/precision checks.
+static float value_to_float(VALUE val) { return NUM2DBL(val); }
+static double value_to_double(VALUE val) { return NUM2DBL(val); }
+static bool value_to_bool(VALUE val) { return RTEST(val); }
+static int32_t value_to_int32(VALUE val) { return NUM2INT(val); }
+static uint32_t value_to_uint32(VALUE val) { return NUM2LONG(val); }
+static int64_t value_to_int64(VALUE val) { return NUM2LONG(val); }
+static uint64_t value_to_uint64(VALUE val) { return NUM2ULL(val); }
+
+// C -> Ruby VALUE
+static VALUE float_to_value(float val) { return rb_float_new(val); }
+static VALUE double_to_value(double val) { return rb_float_new(val); }
+static VALUE bool_to_value(bool val) { return val ? Qtrue : Qfalse; }
+static VALUE int32_to_value(int32_t val) { return INT2NUM(val); }
+static VALUE uint32_to_value(uint32_t val) { return LONG2NUM(val); }
+static VALUE int64_to_value(int64_t val) { return LONG2NUM(val); }
+static VALUE uint64_to_value(uint64_t val) { return ULL2NUM(val); }
+
+
+/* stringsink *****************************************************************/
+
+// This should probably be factored into a common upb component.
+
+typedef struct {
+ upb_byteshandler handler;
+ upb_bytessink sink;
+ char *ptr;
+ size_t len, size;
+} stringsink;
+
+static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
+ stringsink *sink = _sink;
+ sink->len = 0;
+ return sink;
+}
-// These are handlers for populating a Ruby protobuf message when parsing.
+static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
+ size_t len, const upb_bufhandle *handle) {
+ UPB_UNUSED(hd);
+ UPB_UNUSED(handle);
+
+ stringsink *sink = _sink;
+ size_t new_size = sink->size;
+
+ while (sink->len + len > new_size) {
+ new_size *= 2;
+ }
+
+ if (new_size != sink->size) {
+ sink->ptr = realloc(sink->ptr, new_size);
+ sink->size = new_size;
+ }
+
+ memcpy(sink->ptr + sink->len, ptr, len);
+ sink->len += len;
-static size_t strhandler(void *closure, const void *hd, const char *str,
- size_t len, const upb_bufhandle *handle) {
- rb_msg *msg = closure;
- const size_t *ofs = hd;
- DEREF(msg, *ofs, VALUE) = rb_str_new(str, len);
return len;
}
-static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) {
- size_t *hd_ofs = ALLOC(size_t);
- *hd_ofs = ofs;
- upb_handlers_addcleanup(h, hd_ofs, free);
- return hd_ofs;
+void stringsink_init(stringsink *sink) {
+ upb_byteshandler_init(&sink->handler);
+ upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
+ upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
+
+ upb_bytessink_reset(&sink->sink, &sink->handler, sink);
+
+ sink->size = 32;
+ sink->ptr = malloc(sink->size);
}
-static void add_handlers_for_message(const void *closure, upb_handlers *h) {
- // XXX: Doesn't support submessages properly yet.
- const rb_msgdef *rmd = closure;
- upb_msg_iter i;
- for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
- upb_fielddef *f = upb_msg_iter_field(&i);
+void stringsink_uninit(stringsink *sink) {
+ free(sink->ptr);
+}
- if (upb_fielddef_isseq(f)) {
- rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet.");
- }
- size_t ofs = rmd->field_offsets[upb_fielddef_index(f)];
+/* object cache ***************************************************************/
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE_BOOL:
- case UPB_TYPE_INT32:
- case UPB_TYPE_UINT32:
- case UPB_TYPE_ENUM:
- case UPB_TYPE_FLOAT:
- case UPB_TYPE_INT64:
- case UPB_TYPE_UINT64:
- case UPB_TYPE_DOUBLE:
- upb_shim_set(h, f, ofs, -1);
- break;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES: {
- upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs));
- // XXX: does't currently handle split buffers.
- upb_handlers_setstring(h, f, strhandler, &attr);
- upb_handlerattr_uninit(&attr);
- break;
- }
- case UPB_TYPE_MESSAGE:
- rb_raise(rb_eRuntimeError, "Doesn't support submessages yet.");
- break;
- }
+// The object cache is a singleton mapping of void* -> Ruby Object.
+// It caches Ruby objects that wrap C objects.
+//
+// When we are wrapping C objects it is desirable to give them identity
+// semantics. In other words, if you reach the same C object via two different
+// paths, it is desirable (and sometimes even required) that you get the same
+// wrapper object both times. If we instead just created a new wrapper object
+// every time you ask for one, we could end up with unexpected results like:
+//
+// f1 = msgdef.field("request_id")
+// f2 = msgdef.field("request_id")
+//
+// # equal? tests identity equality. Returns false without a cache.
+// f1.equal?(f2)
+//
+// We do not register the cache with Ruby's GC, so being in this map will not
+// keep the object alive. This is the desired behavior, because it lets objects
+// be freed if they have no references from Ruby. We do require, though, that
+// objects remove themselves from the map when they are freed. In this respect
+// the cache operates like a weak map where the values are weak.
+
+typedef VALUE createfunc(const void *obj);
+
+// Call to initialize the cache. Should be done once on process startup.
+static void objcache_init() {
+ upb_inttable_init(&objcache, UPB_CTYPE_UINT64);
+ objcache_initialized = true;
+}
+
+// Call to uninitialize the cache. Should be done once on process shutdown.
+static void objcache_uninit(ruby_vm_t *vm) {
+ assert(objcache_initialized);
+ assert(upb_inttable_count(&objcache) == 0);
+
+ objcache_initialized = false;
+ upb_inttable_uninit(&objcache);
+}
+
+// Looks up the given object in the cache. If the corresponding Ruby wrapper
+// object is found, returns it, otherwise creates the wrapper and returns that.
+static VALUE objcache_getorcreate(const void *obj, createfunc *func) {
+ assert(objcache_initialized);
+
+ upb_value v;
+ if (!upb_inttable_lookupptr(&objcache, obj, &v)) {
+ v = upb_value_uint64(func(obj));
+ upb_inttable_insertptr(&objcache, obj, v);
}
+ return upb_value_getuint64(v);
}
-// Creates upb handlers for populating a message.
-static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd,
- const void *owner) {
- return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd);
+// Removes the given object from the cache. Should only be called by the code
+// that is freeing the wrapper object.
+static void objcache_remove(const void *obj) {
+ assert(objcache_initialized);
+
+ bool removed = upb_inttable_removeptr(&objcache, obj, NULL);
+ UPB_ASSERT_VAR(removed, removed);
+}
+
+/* message layout *************************************************************/
+
+// We layout Ruby messages using a raw block of C memory. We assign offsets for
+// each member so that instances are laid out like a C struct instead of as
+// instance variables. This saves both memory and CPU.
+
+typedef struct {
+ // The size of the block of memory we should allocate for instances.
+ size_t size;
+
+ // Prototype to memcpy() onto new message instances. Size is "size" above.
+ void *prototype;
+
+ // An offset for each member, indexed by upb_fielddef_index(f).
+ uint32_t *field_offsets;
+} rb_msglayout;
+
+// Returns true for fields where the field value we store is a Ruby VALUE (ie. a
+// direct pointer to another Ruby object) instead of storing the value directly
+// in the message.
+static bool is_ruby_value(const upb_fielddef *f) {
+ if (upb_fielddef_isseq(f)) {
+ // Repeated fields are pointers to arrays.
+ return true;
+ }
+
+ if (upb_fielddef_issubmsg(f)) {
+ // Submessage fields are pointers to submessages.
+ return true;
+ }
+
+ if (upb_fielddef_isstring(f)) {
+ // String fields are pointers to string objects.
+ return true;
+ }
+
+ return false;
}
// General alignment rules are that each type needs to be stored at an address
@@ -116,8 +241,12 @@ static size_t align_up(size_t val, size_t align) {
}
// Byte size to store each upb type.
-static size_t rupb_sizeof(upb_fieldtype_t type) {
- switch (type) {
+static size_t rupb_sizeof(const upb_fielddef *f) {
+ if (is_ruby_value(f)) {
+ return sizeof(VALUE);
+ }
+
+ switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
return 1;
case UPB_TYPE_INT32:
@@ -129,15 +258,228 @@ static size_t rupb_sizeof(upb_fieldtype_t type) {
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
return 8;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES:
- case UPB_TYPE_MESSAGE:
- return sizeof(VALUE);
+ default:
+ break;
}
assert(false);
+ return 0;
}
-/* msg ************************************************************************/
+// Calculates offsets for each field.
+//
+// This lets us pack protos like structs instead of storing them like
+// dictionaries. This speeds up a parsing a lot and also saves memory
+// (unless messages are very sparse).
+static void assign_offsets(rb_msglayout *layout, const upb_msgdef *md) {
+ layout->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md));
+ size_t ofs = 0;
+ upb_msg_iter i;
+
+ for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ const upb_fielddef *f = upb_msg_iter_field(&i);
+ size_t field_size = rupb_sizeof(f);
+
+ // Align field properly.
+ //
+ // TODO(haberman): optimize layout? For example we could sort fields
+ // big-to-small.
+ ofs = align_up(ofs, field_size);
+
+ layout->field_offsets[upb_fielddef_index(f)] = ofs;
+ ofs += field_size;
+ }
+
+ layout->size = ofs;
+}
+
+// Creates a prototype; a buffer we can memcpy() onto new instances to
+// initialize them.
+static void make_prototype(rb_msglayout *layout, const upb_msgdef *md) {
+ void *prototype = ALLOC_N(char, layout->size);
+
+ // Most members default to zero, so we'll start from that and then overwrite
+ // more specific initialization.
+ memset(prototype, 0, layout->size);
+
+ upb_msg_iter i;
+ for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ const upb_fielddef *f = upb_msg_iter_field(&i);
+ if (is_ruby_value(f)) {
+ size_t ofs = layout->field_offsets[upb_fielddef_index(f)];
+ // Default all Ruby pointers to nil.
+ DEREF_RAW(prototype, ofs, VALUE) = Qnil;
+ }
+ }
+
+ layout->prototype = prototype;
+}
+
+
+static void msglayout_init(rb_msglayout *layout, const upb_msgdef *m) {
+ assign_offsets(layout, m);
+ make_prototype(layout, m);
+}
+
+static void msglayout_uninit(rb_msglayout *layout) {
+ free(layout->field_offsets);
+ free(layout->prototype);
+}
+
+
+/* Upb::MessageDef ************************************************************/
+
+// C representation for Upb::MessageDef.
+//
+// Contains a reference to the underlying upb_msgdef, as well as associated data
+// like a reference to the corresponding Ruby class.
+struct rupb_MessageDef {
+ // We own refs on all of these.
+
+ // The upb_msgdef we are wrapping.
+ const upb_msgdef *md;
+
+ // A DecoderMethod for parsing a protobuf into this type.
+ const upb_pbdecodermethod *fill_method;
+
+ // Handlers for serializing into a protobuf of this type.
+ const upb_handlers *serialize_handlers;
+
+ // The Ruby class for instances of this type.
+ VALUE klass;
+
+ // Layout for messages of this type.
+ rb_msglayout layout;
+};
+
+// Called by the Ruby GC when a Upb::MessageDef is being freed.
+static void msgdef_free(void *_rmd) {
+ rupb_MessageDef *rmd = _rmd;
+ objcache_remove(rmd->md);
+ upb_msgdef_unref(rmd->md, &rmd->md);
+ if (rmd->fill_method) {
+ upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method);
+ }
+ if (rmd->serialize_handlers) {
+ upb_handlers_unref(rmd->serialize_handlers, &rmd->serialize_handlers);
+ }
+ msglayout_uninit(&rmd->layout);
+ free(rmd);
+}
+
+// Called by the Ruby GC during the "mark" phase to decide what is still alive.
+// We call rb_gc_mark on all Ruby VALUE pointers we reference.
+static void msgdef_mark(void *_rmd) {
+ rupb_MessageDef *rmd = _rmd;
+ rb_gc_mark(rmd->klass);
+
+ // Mark all submessage types.
+ upb_msg_iter i;
+ for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ if (upb_fielddef_issubmsg(f)) {
+ // If we were trying to be more aggressively lazy, the submessage might
+ // not be created and we only mark ones that are.
+ rb_gc_mark(msgdef_getwrapper(upb_fielddef_msgsubdef(f)));
+ }
+ }
+}
+
+static const rb_data_type_t msgdef_type = {"Upb::MessageDef",
+ {msgdef_mark, msgdef_free, NULL}};
+
+// TODO(haberman): do we need an alloc func? We want to prohibit dup and
+// probably subclassing too.
+
+static rupb_MessageDef *msgdef_get(VALUE self) {
+ rupb_MessageDef *msgdef;
+ TypedData_Get_Struct(self, rupb_MessageDef, &msgdef_type, msgdef);
+ return msgdef;
+}
+
+// Constructs the upb decoder method for parsing messages of this type.
+const upb_pbdecodermethod *new_fillmsg_decodermethod(const rupb_MessageDef *rmd,
+ const void *owner) {
+ const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers);
+ upb_pbdecodermethodopts opts;
+ upb_pbdecodermethodopts_init(&opts, fill_handlers);
+
+ const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner);
+ upb_handlers_unref(fill_handlers, &fill_handlers);
+ return ret;
+}
+
+// Constructs a new Ruby wrapper object around the given msgdef.
+static VALUE make_msgdef(const void *_md) {
+ const upb_msgdef *md = _md;
+ rupb_MessageDef *rmd;
+ VALUE ret =
+ TypedData_Make_Struct(cMessageDef, rupb_MessageDef, &msgdef_type, rmd);
+
+ upb_msgdef_ref(md, &rmd->md);
+
+ rmd->md = md;
+ rmd->fill_method = NULL;
+
+ // OPT: most of these things could be built lazily, when they are first
+ // needed.
+ msglayout_init(&rmd->layout, md);
+
+ rmd->fill_method = NULL;
+ rmd->klass = new_message_class(ret);
+ rmd->serialize_handlers =
+ upb_pb_encoder_newhandlers(md, &rmd->serialize_handlers);
+
+ return ret;
+}
+
+// Accessor to get a decoder method for this message type.
+// Constructs the decoder method lazily.
+static const upb_pbdecodermethod *msgdef_decodermethod(rupb_MessageDef *rmd) {
+ if (!rmd->fill_method) {
+ rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method);
+ }
+
+ return rmd->fill_method;
+}
+
+static VALUE msgdef_getwrapper(const upb_msgdef *md) {
+ return objcache_getorcreate(md, make_msgdef);
+}
+
+static const rupb_MessageDef *get_rbmsgdef(const upb_msgdef *md) {
+ return msgdef_get(msgdef_getwrapper(md));
+}
+
+
+/* Upb::Message ***************************************************************/
+
+// Code to implement the Upb::Message object.
+//
+// A unique Ruby class is generated for each message type, but all message types
+// share Upb::Message as their base class. Upb::Message contains all of the
+// actual functionality; the only reason the derived class exists at all is
+// for convenience. It lets Ruby users do things like:
+//
+// message = MyMessage.new
+// if message.kind_of?(MyMessage)
+//
+// ... and other similar things that Ruby users expect they can do.
+
+// C representation of Upb::Message.
+//
+// Represents a message instance, laid out like a C struct in a type-specific
+// layout.
+//
+// This will be sized according to what fields are actually present.
+struct rupb_Message {
+ VALUE rbmsgdef;
+ char data[];
+};
+
+// Returns the size of a message instance.
+size_t msg_size(const rupb_MessageDef *rmd) {
+ return sizeof(rupb_Message) + rmd->layout.size;
+}
static void msg_free(void *msg) {
free(msg);
@@ -145,103 +487,170 @@ static void msg_free(void *msg) {
// Invoked by the Ruby GC whenever it is doing a mark-and-sweep.
static void msg_mark(void *p) {
- rb_msg *msg = p;
- rb_msgdef *rmd;
- Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd);
+ rupb_Message *msg = p;
+ rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef);
// Mark the msgdef to keep it alive.
- rb_gc_mark(msg->data.rbmsgdef);
+ rb_gc_mark(msg->rbmsgdef);
// We need to mark all references to other Ruby values: strings, arrays, and
- // submessages that we point to. Only strings are implemented so far.
+ // submessages that we point to.
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
- if (upb_fielddef_isstring(f)) {
- size_t ofs = rmd->field_offsets[upb_fielddef_index(f)];
+ if (is_ruby_value(f)) {
+ size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];
rb_gc_mark(DEREF(msg, ofs, VALUE));
}
}
}
-static VALUE msg_new(VALUE msgdef) {
- const rb_msgdef *rmd;
- Data_Get_Struct(msgdef, rb_msgdef, rmd);
+static const rb_data_type_t msg_type = {"Upb::Message",
+ {msg_mark, msg_free, NULL}};
+
+static rupb_Message *msg_get(VALUE self) {
+ rupb_Message *msg;
+ TypedData_Get_Struct(self, rupb_Message, &msg_type, msg);
+ return msg;
+}
+
+// Instance variable name that we use to store a reference from the Ruby class
+// for a message and its Upb::MessageDef.
+//
+// We avoid prefixing this by "@" to make it inaccessible by Ruby.
+static const char *kMessageDefMemberName = "msgdef";
+
+static VALUE msg_getmsgdef(VALUE klass) {
+ VALUE msgdef = rb_iv_get(klass, kMessageDefMemberName);
- rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size);
- memset(msg, 0, rmd->size);
- msg->data.rbmsgdef = msgdef;
+ if (msgdef == Qnil) {
+ // TODO(haberman): If we want to allow subclassing, we might want to walk up
+ // the hierarchy looking for this member.
+ rb_raise(rb_eRuntimeError,
+ "Can't call on Upb::Message directly, only subclasses");
+ }
+
+ return msgdef;
+}
- VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg);
+// Called by the Ruby VM when it wants to create a new message instance.
+static VALUE msg_alloc(VALUE klass) {
+ VALUE msgdef = msg_getmsgdef(klass);
+ const rupb_MessageDef *rmd = msgdef_get(msgdef);
+
+ rupb_Message *msg = (rupb_Message*)ALLOC_N(char, msg_size(rmd));
+ msg->rbmsgdef = msgdef;
+ memcpy(&msg->data, rmd->layout.prototype, rmd->layout.size);
+
+ VALUE ret = TypedData_Wrap_Struct(klass, &msg_type, msg);
return ret;
}
-static const upb_fielddef *lookup_field(rb_msg *msg, const char *field,
- size_t *ofs) {
- const rb_msgdef *rmd;
- Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd);
- const upb_fielddef *f = upb_msgdef_ntof(rmd->md, field);
+// Creates a new Ruby class for the given Upb::MessageDef. The new class
+// derives from Upb::Message but also stores a reference to the Upb::MessageDef.
+static VALUE new_message_class(VALUE message_def) {
+ msgdef_get(message_def); // Check type.
+ VALUE klass = rb_class_new(cMessage);
+ rb_iv_set(klass, kMessageDefMemberName, message_def);
+
+ // This shouldn't be necessary because we should inherit the alloc func from
+ // the base class of Message. For some reason this is not working properly
+ // and we are having to define it manually.
+ rb_define_alloc_func(klass, msg_alloc);
+
+ return klass;
+}
+
+// Call to create a new Message instance.
+static VALUE msg_new(VALUE msgdef) {
+ return rb_class_new_instance(0, NULL, get_message_class(Qnil, msgdef));
+}
+
+// Looks up the given field. On success returns the upb_fielddef and stores the
+// offset in *ofs. Otherwise raises a Ruby exception.
+static const upb_fielddef *lookup_field(rupb_Message *msg, const char *field,
+ size_t len, size_t *ofs) {
+ const rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef);
+ const upb_fielddef *f = upb_msgdef_ntof(rmd->md, field, len);
+
if (!f) {
- rb_raise(rb_eArgError, "No such field: %s", field);
+ rb_raise(rb_eArgError, "Message %s does not contain field %s",
+ upb_msgdef_fullname(rmd->md), field);
}
- *ofs = rmd->field_offsets[upb_fielddef_index(f)];
+
+ *ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];
return f;
}
-static VALUE msg_setter(rb_msg *msg, VALUE field, VALUE val) {
+// Sets the given field to the given value.
+static void setprimitive(rupb_Message *m, size_t ofs, const upb_fielddef *f,
+ VALUE val) {
+ switch (upb_fielddef_type(f)) {
+ case UPB_TYPE_FLOAT: DEREF(m, ofs, float) = value_to_float(val); break;
+ case UPB_TYPE_DOUBLE: DEREF(m, ofs, double) = value_to_double(val); break;
+ case UPB_TYPE_BOOL: DEREF(m, ofs, bool) = value_to_bool(val); break;
+ case UPB_TYPE_ENUM:
+ case UPB_TYPE_INT32: DEREF(m, ofs, int32_t) = value_to_int32(val); break;
+ case UPB_TYPE_UINT32: DEREF(m, ofs, uint32_t) = value_to_uint32(val); break;
+ case UPB_TYPE_INT64: DEREF(m, ofs, int64_t) = value_to_int64(val); break;
+ case UPB_TYPE_UINT64: DEREF(m, ofs, uint64_t) = value_to_uint64(val); break;
+ default: rb_bug("Unexpected type");
+ }
+}
+
+// Returns the Ruby VALUE for the given field.
+static VALUE getprimitive(rupb_Message *m, size_t ofs, const upb_fielddef *f) {
+ switch (upb_fielddef_type(f)) {
+ case UPB_TYPE_FLOAT: return float_to_value(DEREF(m, ofs, float));
+ case UPB_TYPE_DOUBLE: return double_to_value(DEREF(m, ofs, double));
+ case UPB_TYPE_BOOL: return bool_to_value(DEREF(m, ofs, bool));
+ case UPB_TYPE_ENUM:
+ case UPB_TYPE_INT32: return int32_to_value(DEREF(m, ofs, int32_t));
+ case UPB_TYPE_UINT32: return uint32_to_value(DEREF(m, ofs, uint32_t));
+ case UPB_TYPE_INT64: return int64_to_value(DEREF(m, ofs, int64_t));
+ case UPB_TYPE_UINT64: return uint64_to_value(DEREF(m, ofs, uint64_t));
+ default: rb_bug("Unexpected type");
+ }
+}
+
+static VALUE msg_setter(rupb_Message *msg, VALUE field, VALUE val) {
size_t ofs;
- char *fieldp = RSTRING_PTR(field);
- size_t field_last = RSTRING_LEN(field) - 1;
// fieldp is a string like "id=". But we want to look up "id".
- // We take the liberty of temporarily setting the "=" to NULL.
- assert(fieldp[field_last] == '=');
- fieldp[field_last] = '\0';
- const upb_fielddef *f = lookup_field(msg, fieldp, &ofs);
- fieldp[field_last] = '=';
+ const upb_fielddef *f =
+ lookup_field(msg, RSTRING_PTR(field), RSTRING_LEN(field) - 1, &ofs);
// Possibly introduce stricter type checking.
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE_FLOAT: DEREF(msg, ofs, float) = NUM2DBL(val);
- case UPB_TYPE_DOUBLE: DEREF(msg, ofs, double) = NUM2DBL(val);
- case UPB_TYPE_BOOL: DEREF(msg, ofs, bool) = RTEST(val);
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES: DEREF(msg, ofs, VALUE) = val;
- case UPB_TYPE_MESSAGE: return Qnil;
- case UPB_TYPE_ENUM:
- case UPB_TYPE_INT32: DEREF(msg, ofs, int32_t) = NUM2INT(val);
- case UPB_TYPE_UINT32: DEREF(msg, ofs, uint32_t) = NUM2LONG(val);
- case UPB_TYPE_INT64: DEREF(msg, ofs, int64_t) = NUM2LONG(val);
- case UPB_TYPE_UINT64: DEREF(msg, ofs, uint64_t) = NUM2ULL(val);
+ if (is_ruby_value(f)) {
+ DEREF(msg, ofs, VALUE) = val;
+ } else {
+ setprimitive(msg, ofs, f, val);
}
return val;
}
-static VALUE msg_getter(rb_msg *msg, VALUE field) {
+static VALUE msg_getter(rupb_Message *msg, VALUE field) {
size_t ofs;
- const upb_fielddef *f = lookup_field(msg, RSTRING_PTR(field), &ofs);
+ const upb_fielddef *f =
+ lookup_field(msg, RSTRING_PTR(field), RSTRING_LEN(field), &ofs);
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE_FLOAT: return rb_float_new(DEREF(msg, ofs, float));
- case UPB_TYPE_DOUBLE: return rb_float_new(DEREF(msg, ofs, double));
- case UPB_TYPE_BOOL: return DEREF(msg, ofs, bool) ? Qtrue : Qfalse;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES: return DEREF(msg, ofs, VALUE);
- case UPB_TYPE_MESSAGE: return Qnil;
- case UPB_TYPE_ENUM:
- case UPB_TYPE_INT32: return INT2NUM(DEREF(msg, ofs, int32_t));
- case UPB_TYPE_UINT32: return LONG2NUM(DEREF(msg, ofs, uint32_t));
- case UPB_TYPE_INT64: return LONG2NUM(DEREF(msg, ofs, int64_t));
- case UPB_TYPE_UINT64: return ULL2NUM(DEREF(msg, ofs, uint64_t));
+ if (is_ruby_value(f)) {
+ return DEREF(msg, ofs, VALUE);
+ } else {
+ return getprimitive(msg, ofs, f);
}
-
- rb_bug("Unexpected type");
}
+// This is the Message object's "method_missing" method, so it receives calls
+// for any method whose name was not recognized. We use it to implement getters
+// and setters for every field
+//
+// call-seq:
+// message.field -> current value of "field"
+// message.field = new_value
static VALUE msg_accessor(int argc, VALUE *argv, VALUE obj) {
- rb_msg *msg;
- Data_Get_Struct(obj, rb_msg, msg);
+ rupb_Message *msg = msg_get(obj);
// method_missing protocol: (method [, arg1, arg2, ...])
assert(argc >= 1 && SYMBOL_P(argv[0]));
@@ -270,72 +679,106 @@ static VALUE msg_accessor(int argc, VALUE *argv, VALUE obj) {
}
}
-/* msgdef *********************************************************************/
-
-static void msgdef_free(void *_rmd) {
- rb_msgdef *rmd = _rmd;
- upb_msgdef_unref(rmd->md, &rmd->md);
- if (rmd->fill_method) {
- upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method);
- }
- free(rmd->field_offsets);
+// Called when Ruby wants to turn this value into a string.
+// TODO(haberman): implement.
+static VALUE msg_tostring(VALUE self) {
+ return rb_str_new2("tostring!");
}
-const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd,
- const void *owner) {
- const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers);
- upb_pbdecodermethodopts opts;
- upb_pbdecodermethodopts_init(&opts, fill_handlers);
+// call-seq:
+// MessageClass.parse(binary_protobuf) -> message instance
+//
+// Parses a binary protobuf according to this message class and returns a new
+// message instance of this class type.
+static VALUE msg_parse(VALUE klass, VALUE binary_protobuf) {
+ Check_Type(binary_protobuf, T_STRING);
+ rupb_MessageDef *rmd = msgdef_get(msg_getmsgdef(klass));
- const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner);
- upb_handlers_unref(fill_handlers, &fill_handlers);
- return ret;
+ VALUE msg = rb_class_new_instance(0, NULL, klass);
+ rupb_Message *msgp = msg_get(msg);
+
+ const upb_pbdecodermethod *method = msgdef_decodermethod(rmd);
+ const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+ upb_pbdecoder decoder;
+ upb_sink sink;
+ upb_status status = UPB_STATUS_INIT;
+
+ upb_pbdecoder_init(&decoder, method, &status);
+ upb_sink_reset(&sink, h, msgp);
+ upb_pbdecoder_resetoutput(&decoder, &sink);
+ upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf),
+ RSTRING_LEN(binary_protobuf),
+ upb_pbdecoder_input(&decoder));
+
+ // TODO(haberman): make uninit optional if custom allocator for parsing
+ // returns GC-rooted memory. That will make decoding longjmp-safe (required
+ // if parsing triggers any VM errors like OOM or errors in user handlers).
+ upb_pbdecoder_uninit(&decoder);
+ rupb_checkstatus(&status);
+
+ return msg;
}
-// Calculates offsets for each field.
+// call-seq:
+// Message.serialize(message instance) -> serialized string
//
-// This lets us pack protos like structs instead of storing them like
-// dictionaries. This speeds up a parsing a lot and also saves memory
-// (unless messages are very sparse).
-static void assign_offsets(rb_msgdef *rmd) {
- size_t ofs = sizeof(rb_msg); // Msg starts with predeclared members.
- upb_msg_iter i;
- for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
- upb_fielddef *f = upb_msg_iter_field(&i);
- size_t field_size = rupb_sizeof(upb_fielddef_type(f));
- ofs = align_up(ofs, field_size); // Align field properly.
- rmd->field_offsets[upb_fielddef_index(f)] = ofs;
- ofs += field_size;
- }
- rmd->size = ofs;
-}
+// Serializes the given message instance to a string.
+static VALUE msg_serialize(VALUE klass, VALUE message) {
+ rupb_Message *msg = msg_get(message);
+ const rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef);
-// Constructs a new Ruby wrapper object around the given msgdef.
-static VALUE make_msgdef(const upb_msgdef *md) {
- rb_msgdef *rmd;
- VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd);
+ stringsink sink;
+ stringsink_init(&sink);
- upb_msgdef_ref(md, &rmd->md);
+ upb_pb_encoder encoder;
+ upb_pb_encoder_init(&encoder, rmd->serialize_handlers);
+ upb_pb_encoder_resetoutput(&encoder, &sink.sink);
- rmd->md = md;
- rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md));
- rmd->fill_method = NULL;
+ putmsg(msg, rmd, upb_pb_encoder_input(&encoder));
- assign_offsets(rmd);
+ VALUE ret = rb_str_new(sink.ptr, sink.len);
- rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method);
+ upb_pb_encoder_uninit(&encoder);
+ stringsink_uninit(&sink);
return ret;
}
-// Loads a descriptor and constructs a MessageDef to the named message.
-static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) {
+
+/* Upb::SymbolTable ***********************************************************/
+
+// Ruby wrapper around a SymbolTable. Allows loading of descriptors and turning
+// them into MessageDef objects.
+
+void symtab_free(void *s) {
+ upb_symtab_unref(s, UPB_UNTRACKED_REF);
+}
+
+static const rb_data_type_t symtab_type = {"Upb::SymbolTable",
+ {NULL, symtab_free, NULL}};
+
+// Called by the Ruby VM to allocate a SymbolTable object.
+static VALUE symtab_alloc(VALUE klass) {
upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF);
+ VALUE ret = TypedData_Wrap_Struct(klass, &symtab_type, symtab);
- // Wrap the symtab in a Ruby object so it gets GC'd.
- // In a real wrapper we would wrap this object more fully (ie. expose its
- // methods to Ruby callers).
- Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab);
+ return ret;
+}
+
+static upb_symtab *symtab_get(VALUE self) {
+ upb_symtab *symtab;
+ TypedData_Get_Struct(self, upb_symtab, &symtab_type, symtab);
+ return symtab;
+}
+
+// call-seq:
+// symtab.load_descriptor(descriptor)
+//
+// Parses a FileDescriptorSet from the given string and adds the defs to the
+// SymbolTable. Raises if there was an error.
+static VALUE symtab_load_descriptor(VALUE self, VALUE descriptor) {
+ upb_symtab *symtab = symtab_get(self);
+ Check_Type(descriptor, T_STRING);
upb_status status = UPB_STATUS_INIT;
upb_load_descriptor_into_symtab(
@@ -346,51 +789,377 @@ static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) {
"Error loading descriptor: %s", upb_status_errmsg(&status));
}
- const char *name = RSTRING_PTR(message_name);
- const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name);
+ return Qnil;
+}
+
+// call-seq:
+// symtab.lookup(name)
+//
+// Returns the def for this name, or nil if none.
+// TODO(haberman): only support messages right now, not enums.
+static VALUE symtab_lookup(VALUE self, VALUE name) {
+ upb_symtab *symtab = symtab_get(self);
+ Check_Type(name, T_STRING);
+
+ const char *cname = RSTRING_PTR(name);
+ const upb_msgdef *m = upb_symtab_lookupmsg(symtab, cname);
if (!m) {
- rb_raise(rb_eRuntimeError, "Message name '%s' not found", name);
+ rb_raise(rb_eRuntimeError, "Message name '%s' not found", cname);
}
- return make_msgdef(m);
+ return msgdef_getwrapper(m);
}
-static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) {
- const rb_msgdef *rmd;
- Data_Get_Struct(self, rb_msgdef, rmd);
- VALUE msg = msg_new(self);
- rb_msg *msgp;
- Data_Get_Struct(msg, rb_msg, msgp);
+/* handlers *******************************************************************/
- const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method);
- upb_pbdecoder decoder;
- upb_sink sink;
- upb_status status = UPB_STATUS_INIT;
+// These are handlers for populating a Ruby protobuf message (rupb_Message) when
+// parsing.
- upb_pbdecoder_init(&decoder, rmd->fill_method, &status);
- upb_sink_reset(&sink, h, msgp);
- upb_pbdecoder_resetoutput(&decoder, &sink);
- upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf),
- RSTRING_LEN(binary_protobuf),
- upb_pbdecoder_input(&decoder));
- // TODO(haberman): make uninit optional if custom allocator for parsing
- // returns GC-rooted memory. That will make decoding longjmp-safe (required
- // if parsing triggers any VM errors like OOM or errors in user handlers).
- upb_pbdecoder_uninit(&decoder);
- rupb_checkstatus(&status);
+// Creates a handlerdata that simply contains the offset for this field.
+static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) {
+ size_t *hd_ofs = ALLOC(size_t);
+ *hd_ofs = ofs;
+ upb_handlers_addcleanup(h, hd_ofs, free);
+ return hd_ofs;
+}
- return msg;
+typedef struct {
+ size_t ofs;
+ const upb_msgdef *md;
+} submsg_handlerdata_t;
+
+// Creates a handlerdata that contains offset and submessage type information.
+static const void *newsubmsghandlerdata(upb_handlers *h, uint32_t ofs,
+ const upb_fielddef *f) {
+ submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
+ hd->ofs = ofs;
+ hd->md = upb_fielddef_msgsubdef(f);
+ upb_handlers_addcleanup(h, hd, free);
+ return hd;
+}
+
+// A handler that starts a repeated field. Gets or creates a Ruby array for the
+// field.
+static void *startseq_handler(void *closure, const void *hd) {
+ rupb_Message *msg = closure;
+ const size_t *ofs = hd;
+
+ if (DEREF(msg, *ofs, VALUE) == Qnil) {
+ DEREF(msg, *ofs, VALUE) = rb_ary_new();
+ }
+
+ return (void*)DEREF(msg, *ofs, VALUE);
+}
+
+// Handlers that append primitive values to a repeated field (a regular Ruby
+// array for now).
+#define DEFINE_APPEND_HANDLER(type, ctype) \
+ static bool append##type##_handler(void *closure, const void *hd, \
+ ctype val) { \
+ VALUE ary = (VALUE)closure; \
+ rb_ary_push(ary, type##_to_value(val)); \
+ return true; \
+ }
+
+DEFINE_APPEND_HANDLER(bool, bool)
+DEFINE_APPEND_HANDLER(int32, int32_t)
+DEFINE_APPEND_HANDLER(uint32, uint32_t)
+DEFINE_APPEND_HANDLER(float, float)
+DEFINE_APPEND_HANDLER(int64, int64_t)
+DEFINE_APPEND_HANDLER(uint64, uint64_t)
+DEFINE_APPEND_HANDLER(double, double)
+
+// Appends a string to a repeated field (a regular Ruby array for now).
+static size_t appendstr_handler(void *closure, const void *hd, const char *str,
+ size_t len, const upb_bufhandle *handle) {
+ VALUE ary = (VALUE)closure;
+ rb_ary_push(ary, rb_str_new(str, len));
+ return len;
+}
+
+// Sets a non-repeated string field in a message.
+static size_t str_handler(void *closure, const void *hd, const char *str,
+ size_t len, const upb_bufhandle *handle) {
+ rupb_Message *msg = closure;
+ const size_t *ofs = hd;
+ DEREF(msg, *ofs, VALUE) = rb_str_new(str, len);
+ return len;
+}
+
+// Appends a submessage to a repeated field (a regular Ruby array for now).
+static void *appendsubmsg_handler(void *closure, const void *hd) {
+ VALUE ary = (VALUE)closure;
+ const submsg_handlerdata_t *submsgdata = hd;
+ VALUE submsg = msg_new(msgdef_getwrapper(submsgdata->md));
+ rb_ary_push(ary, submsg);
+ return msg_get(submsg);
+}
+
+// Sets a non-repeated submessage field in a message.
+static void *submsg_handler(void *closure, const void *hd) {
+ rupb_Message *msg = closure;
+ const submsg_handlerdata_t *submsgdata = hd;
+
+ if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
+ DEREF(msg, submsgdata->ofs, VALUE) = msg_new(msgdef_getwrapper(submsgdata->md));
+ }
+
+ VALUE submsg = DEREF(msg, submsgdata->ofs, VALUE);
+ return msg_get(submsg);
+}
+
+static void add_handlers_for_message(const void *closure, upb_handlers *h) {
+ const rupb_MessageDef *rmd = get_rbmsgdef(upb_handlers_msgdef(h));
+ upb_msg_iter i;
+
+ for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ const upb_fielddef *f = upb_msg_iter_field(&i);
+ size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];
+
+ if (upb_fielddef_isseq(f)) {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs));
+ upb_handlers_setstartseq(h, f, startseq_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+
+ switch (upb_fielddef_type(f)) {
+
+#define SET_HANDLER(utype, ltype) \
+ case utype: \
+ upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
+ break;
+
+ SET_HANDLER(UPB_TYPE_BOOL, bool);
+ SET_HANDLER(UPB_TYPE_INT32, int32);
+ SET_HANDLER(UPB_TYPE_UINT32, uint32);
+ SET_HANDLER(UPB_TYPE_ENUM, int32);
+ SET_HANDLER(UPB_TYPE_FLOAT, float);
+ SET_HANDLER(UPB_TYPE_INT64, int64);
+ SET_HANDLER(UPB_TYPE_UINT64, uint64);
+ SET_HANDLER(UPB_TYPE_DOUBLE, double);
+
+#undef SET_HANDLER
+
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES:
+ // XXX: does't currently handle split buffers.
+ upb_handlers_setstring(h, f, appendstr_handler, NULL);
+ break;
+ case UPB_TYPE_MESSAGE: {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
+ upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ }
+ }
+
+ switch (upb_fielddef_type(f)) {
+ case UPB_TYPE_BOOL:
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_ENUM:
+ case UPB_TYPE_FLOAT:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT64:
+ case UPB_TYPE_DOUBLE:
+ // The shim writes directly at the given offset (instead of using
+ // DEREF()) so we need to add the msg overhead.
+ upb_shim_set(h, f, ofs + sizeof(rupb_Message), -1);
+ break;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES: {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs));
+ // XXX: does't currently handle split buffers.
+ upb_handlers_setstring(h, f, str_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ case UPB_TYPE_MESSAGE: {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, ofs, f));
+ upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ }
+ }
+}
+
+// Creates upb handlers for populating a message.
+static const upb_handlers *new_fill_handlers(const rupb_MessageDef *rmd,
+ const void *owner) {
+ return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, NULL);
+}
+
+
+/* msgvisitor *****************************************************************/
+
+// This is code to push the contents of a Ruby message (rupb_Message) to a upb
+// sink.
+
+static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
+ upb_selector_t ret;
+ bool ok = upb_handlers_getselector(f, type, &ret);
+ UPB_ASSERT_VAR(ok, ok);
+ return ret;
+}
+
+static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) {
+ if (str == Qnil) return;
+
+ assert(BUILTIN_TYPE(str) == RUBY_T_STRING);
+ upb_sink subsink;
+
+ upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str),
+ &subsink);
+ upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str),
+ RSTRING_LEN(str), NULL);
+ upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
+}
+
+static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink) {
+ if (submsg == Qnil) return;
+
+ upb_sink subsink;
+ const rupb_MessageDef *sub_rmd = get_rbmsgdef(upb_fielddef_msgsubdef(f));
+
+ upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
+ putmsg(msg_get(submsg), sub_rmd, &subsink);
+ upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
+}
+
+static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink) {
+ if (ary == Qnil) return;
+
+ assert(BUILTIN_TYPE(ary) == RUBY_T_ARRAY);
+ upb_sink subsink;
+
+ upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
+
+ upb_fieldtype_t type = upb_fielddef_type(f);
+ upb_selector_t sel = 0;
+ if (upb_fielddef_isprimitive(f)) {
+ sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+ }
+
+ int i;
+ for (i = 0; i < RARRAY_LEN(ary); i++) {
+ VALUE val = rb_ary_entry(ary, i);
+ switch (type) {
+
+#define T(upbtypeconst, upbtype, ctype) \
+ case upbtypeconst: \
+ upb_sink_put##upbtype(&subsink, sel, value_to_##upbtype(val)); \
+ break;
+
+ T(UPB_TYPE_FLOAT, float, float)
+ T(UPB_TYPE_DOUBLE, double, double)
+ T(UPB_TYPE_BOOL, bool, bool)
+ case UPB_TYPE_ENUM:
+ T(UPB_TYPE_INT32, int32, int32_t)
+ T(UPB_TYPE_UINT32, uint32, uint32_t)
+ T(UPB_TYPE_INT64, int64, int64_t)
+ T(UPB_TYPE_UINT64, uint64, uint64_t)
+
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES:
+ putstr(val, f, &subsink);
+ break;
+ case UPB_TYPE_MESSAGE:
+ putsubmsg(val, f, &subsink);
+ break;
+
+#undef T
+
+ }
+ }
+ upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
+}
+
+static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd,
+ upb_sink *sink) {
+ upb_sink_startmsg(sink);
+
+ upb_msg_iter i;
+ for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ uint32_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];
+
+ if (upb_fielddef_isseq(f)) {
+ VALUE ary = DEREF(msg, ofs, VALUE);
+ if (ary != Qnil) {
+ putary(ary, f, sink);
+ }
+ } else if (upb_fielddef_isstring(f)) {
+ putstr(DEREF(msg, ofs, VALUE), f, sink);
+ } else if (upb_fielddef_issubmsg(f)) {
+ putsubmsg(DEREF(msg, ofs, VALUE), f, sink);
+ } else {
+ upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+
+#define T(upbtypeconst, upbtype, ctype) \
+ case upbtypeconst: \
+ upb_sink_put##upbtype(sink, sel, DEREF(msg, ofs, ctype)); \
+ break;
+
+ switch (upb_fielddef_type(f)) {
+ T(UPB_TYPE_FLOAT, float, float)
+ T(UPB_TYPE_DOUBLE, double, double)
+ T(UPB_TYPE_BOOL, bool, bool)
+ case UPB_TYPE_ENUM:
+ T(UPB_TYPE_INT32, int32, int32_t)
+ T(UPB_TYPE_UINT32, uint32, uint32_t)
+ T(UPB_TYPE_INT64, int64, int64_t)
+ T(UPB_TYPE_UINT64, uint64, uint64_t)
+
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES:
+ case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error.");
+ }
+
+#undef T
+
+ }
+ }
+
+ upb_status status;
+ upb_sink_endmsg(sink, &status);
+}
+
+
+/* top level ******************************************************************/
+
+static VALUE get_message_class(VALUE klass, VALUE message) {
+ rupb_MessageDef *rmd = msgdef_get(message);
+ return rmd->klass;
}
void Init_upb() {
VALUE upb = rb_define_module("Upb");
+ rb_define_singleton_method(upb, "get_message_class", get_message_class, 1);
+ rb_gc_register_address(&message_map);
+
+ cSymbolTable = rb_define_class_under(upb, "SymbolTable", rb_cObject);
+ rb_define_alloc_func(cSymbolTable, symtab_alloc);
+ rb_define_method(cSymbolTable, "load_descriptor", symtab_load_descriptor, 1);
+ rb_define_method(cSymbolTable, "lookup", symtab_lookup, 1);
cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject);
- rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2);
- rb_define_method(cMessageDef, "parse", msgdef_parse, 1);
cMessage = rb_define_class_under(upb, "Message", rb_cObject);
+ rb_define_alloc_func(cMessage, msg_alloc);
rb_define_method(cMessage, "method_missing", msg_accessor, -1);
+ rb_define_method(cMessage, "to_s", msg_tostring, 0);
+ rb_define_singleton_method(cMessage, "parse", msg_parse, 1);
+ rb_define_singleton_method(cMessage, "serialize", msg_serialize, 1);
+
+ objcache_init();
+
+ // This causes atexit crashes for unknown reasons. :(
+ // ruby_vm_at_exit(objcache_uninit);
}
diff --git a/upb/def.c b/upb/def.c
index b1598d8..fde2ee8 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -628,6 +628,7 @@ upb_fielddef *upb_fielddef_new(const void *owner) {
f->tagdelim = false;
f->is_extension_ = false;
f->lazy_ = false;
+ f->packed_ = true;
// For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
// with all integer types and is in some since more "default" since the most
@@ -735,6 +736,10 @@ bool upb_fielddef_lazy(const upb_fielddef *f) {
return f->lazy_;
}
+bool upb_fielddef_packed(const upb_fielddef *f) {
+ return f->packed_;
+}
+
const char *upb_fielddef_name(const upb_fielddef *f) {
return upb_def_fullname(UPB_UPCAST(f));
}
@@ -1030,6 +1035,11 @@ void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
f->lazy_ = lazy;
}
+void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
+ assert(!upb_fielddef_isfrozen(f));
+ f->packed_ = packed;
+}
+
void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
assert(!upb_fielddef_isfrozen(f));
assert(upb_fielddef_checklabel(label));
@@ -1341,7 +1351,7 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
upb_status_seterrmsg(s, "field name or number were not set");
return false;
} else if(upb_msgdef_itof(m, upb_fielddef_number(f)) ||
- upb_msgdef_ntof(m, upb_fielddef_name(f))) {
+ upb_msgdef_ntofz(m, upb_fielddef_name(f))) {
upb_status_seterrmsg(s, "duplicate field name or number");
return false;
}
@@ -1365,20 +1375,13 @@ const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
upb_value_getptr(val) : NULL;
}
-const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) {
+const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
+ size_t len) {
upb_value val;
- return upb_strtable_lookup(&m->ntof, name, &val) ?
+ return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
upb_value_getptr(val) : NULL;
}
-upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) {
- return (upb_fielddef*)upb_msgdef_itof(m, i);
-}
-
-upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name) {
- return (upb_fielddef*)upb_msgdef_ntof(m, name);
-}
-
int upb_msgdef_numfields(const upb_msgdef *m) {
return upb_strtable_count(&m->ntof);
}
diff --git a/upb/def.h b/upb/def.h
index 7a9a655..2699fbf 100644
--- a/upb/def.h
+++ b/upb/def.h
@@ -324,6 +324,13 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD,
// contain both regular FieldOptions like "lazy" *and* custom options).
bool lazy() const;
+ // For non-string, non-submessage fields, this indicates whether binary
+ // protobufs are encoded in packed or non-packed format.
+ //
+ // TODO(haberman): see note above about putting options like this into a
+ // FieldOptions container.
+ bool packed() const;
+
// An integer that can be used as an index into an array of fields for
// whatever message this field belongs to. Guaranteed to be less than
// f->containing_type()->field_count(). May only be accessed once the def has
@@ -430,11 +437,14 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD,
bool set_containing_type_name(const char *name, Status* status);
bool set_containing_type_name(const std::string& name, Status* status);
- // When we freeze, we ensure that this can only be true for length-delimited
- // message fields. Prior to freezing this can be true or false with no
- // restrictions.
+ // Defaults to false. When we freeze, we ensure that this can only be true
+ // for length-delimited message fields. Prior to freezing this can be true or
+ // false with no restrictions.
void set_lazy(bool lazy);
+ // Defaults to true. Sets whether this field is encoded in packed format.
+ void set_packed(bool packed);
+
// "type" or "descriptor_type" MUST be set explicitly before the fielddef is
// finalized. These setters require that the enum value is valid; if the
// value did not come directly from an enum constant, the caller should
@@ -515,6 +525,7 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def,
bool type_is_set_; // False until type is explicitly set.
bool is_extension_;
bool lazy_;
+ bool packed_;
upb_intfmt_t intfmt;
bool tagdelim;
upb_fieldtype_t type_;
@@ -525,13 +536,13 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def,
));
#define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, \
- name, num, msgdef, subdef, selector_base, index, \
- defaultval, refs, ref2s) \
+ packed, name, num, msgdef, subdef, selector_base, \
+ index, defaultval, refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef}, \
{subdef}, false, false, \
type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \
- lazy, intfmt, tagdelim, type, label, num, selector_base, index \
+ lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \
}
UPB_BEGIN_EXTERN_C // {
@@ -561,6 +572,7 @@ uint32_t upb_fielddef_number(const upb_fielddef *f);
const char *upb_fielddef_name(const upb_fielddef *f);
bool upb_fielddef_isextension(const upb_fielddef *f);
bool upb_fielddef_lazy(const upb_fielddef *f);
+bool upb_fielddef_packed(const upb_fielddef *f);
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f);
upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f);
const char *upb_fielddef_containingtypename(upb_fielddef *f);
@@ -596,6 +608,7 @@ bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
upb_status *s);
void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension);
void upb_fielddef_setlazy(upb_fielddef *f, bool lazy);
+void upb_fielddef_setpacked(upb_fielddef *f, bool packed);
void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt);
void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim);
void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t val);
@@ -668,9 +681,26 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
// These return NULL if the field is not found.
FieldDef* FindFieldByNumber(uint32_t number);
- FieldDef* FindFieldByName(const char *name);
+ FieldDef* FindFieldByName(const char *name, size_t len);
const FieldDef* FindFieldByNumber(uint32_t number) const;
- const FieldDef* FindFieldByName(const char* name) const;
+ const FieldDef* FindFieldByName(const char* name, size_t len) const;
+
+
+ FieldDef* FindFieldByName(const char *name) {
+ return FindFieldByName(name, strlen(name));
+ }
+ const FieldDef* FindFieldByName(const char *name) const {
+ return FindFieldByName(name, strlen(name));
+ }
+
+ template <class T>
+ FieldDef* FindFieldByName(const T& str) {
+ return FindFieldByName(str.c_str(), str.size());
+ }
+ template <class T>
+ const FieldDef* FindFieldByName(const T& str) const {
+ return FindFieldByName(str.c_str(), str.size());
+ }
// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
// the fields) but with any references to submessages broken and replaced
@@ -759,12 +789,30 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s);
upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner);
bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
upb_status *s);
+
+// Field lookup in a couple of different variations:
+// - itof = int to field
+// - ntof = name to field
+// - ntofz = name to field, null-terminated string.
const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i);
-const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name);
-upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i);
-upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name);
+const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
+ size_t len);
int upb_msgdef_numfields(const upb_msgdef *m);
+UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m,
+ const char *name) {
+ return upb_msgdef_ntof(m, name, strlen(name));
+}
+
+UPB_INLINE upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) {
+ return (upb_fielddef*)upb_msgdef_itof(m, i);
+}
+
+UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
+ const char *name, size_t len) {
+ return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
+}
+
// upb_msg_iter i;
// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
// upb_fielddef *f = upb_msg_iter_field(&i);
@@ -1027,6 +1075,12 @@ inline bool FieldDef::lazy() const {
inline void FieldDef::set_lazy(bool lazy) {
upb_fielddef_setlazy(this, lazy);
}
+inline bool FieldDef::packed() const {
+ return upb_fielddef_packed(this);
+}
+inline void FieldDef::set_packed(bool packed) {
+ upb_fielddef_setpacked(this, packed);
+}
inline const MessageDef* FieldDef::containing_type() const {
return upb_fielddef_containingtype(this);
}
@@ -1189,14 +1243,15 @@ inline bool MessageDef::AddField(const reffed_ptr<FieldDef>& f, Status* s) {
inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) {
return upb_msgdef_itof_mutable(this, number);
}
-inline FieldDef* MessageDef::FindFieldByName(const char* name) {
- return upb_msgdef_ntof_mutable(this, name);
+inline FieldDef* MessageDef::FindFieldByName(const char* name, size_t len) {
+ return upb_msgdef_ntof_mutable(this, name, len);
}
inline const FieldDef* MessageDef::FindFieldByNumber(uint32_t number) const {
return upb_msgdef_itof(this, number);
}
-inline const FieldDef* MessageDef::FindFieldByName(const char* name) const {
- return upb_msgdef_ntof(this, name);
+inline const FieldDef *MessageDef::FindFieldByName(const char *name,
+ size_t len) const {
+ return upb_msgdef_ntof(this, name, len);
}
inline MessageDef* MessageDef::Dup(const void *owner) const {
return upb_msgdef_dup(this, owner);
diff --git a/upb/descriptor/descriptor.upb.c b/upb/descriptor/descriptor.upb.c
index 31503b1..3678db1 100755
--- a/upb/descriptor/descriptor.upb.c
+++ b/upb/descriptor/descriptor.upb.c
@@ -40,87 +40,87 @@ static const upb_msgdef msgs[20] = {
};
static const upb_fielddef fields[81] = {
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "ctype", 1, &msgs[7], UPB_UPCAST(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "enum_type", 4, &msgs[0], UPB_UPCAST(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "enum_type", 5, &msgs[8], UPB_UPCAST(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "extension", 7, &msgs[8], UPB_UPCAST(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "extension", 6, &msgs[0], UPB_UPCAST(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "extension_range", 5, &msgs[0], UPB_UPCAST(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "field", 2, &msgs[0], UPB_UPCAST(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "file", 1, &msgs[9], UPB_UPCAST(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "label", 4, &msgs[6], UPB_UPCAST(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "location", 1, &msgs[16], UPB_UPCAST(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "message_type", 4, &msgs[8], UPB_UPCAST(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "method", 2, &msgs[14], UPB_UPCAST(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "name", 2, &msgs[18], UPB_UPCAST(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "nested_type", 3, &msgs[0], UPB_UPCAST(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "optimize_for", 9, &msgs[10], UPB_UPCAST(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 7, &msgs[0], UPB_UPCAST(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 3, &msgs[2], UPB_UPCAST(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 8, &msgs[6], UPB_UPCAST(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 3, &msgs[4], UPB_UPCAST(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 8, &msgs[8], UPB_UPCAST(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 3, &msgs[14], UPB_UPCAST(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 4, &msgs[12], UPB_UPCAST(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "service", 6, &msgs[8], UPB_UPCAST(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "source_code_info", 9, &msgs[8], UPB_UPCAST(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "type", 5, &msgs[6], UPB_UPCAST(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[5], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[15], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[3], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[13], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[10], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[11], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[7], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "value", 2, &msgs[2], UPB_UPCAST(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
- UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
- UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], UPB_UPCAST(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], UPB_UPCAST(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], UPB_UPCAST(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], UPB_UPCAST(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], UPB_UPCAST(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], UPB_UPCAST(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], UPB_UPCAST(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], UPB_UPCAST(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], UPB_UPCAST(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], UPB_UPCAST(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], UPB_UPCAST(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], UPB_UPCAST(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], UPB_UPCAST(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], UPB_UPCAST(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], UPB_UPCAST(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], UPB_UPCAST(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], UPB_UPCAST(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], UPB_UPCAST(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], UPB_UPCAST(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], UPB_UPCAST(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], UPB_UPCAST(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], UPB_UPCAST(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], UPB_UPCAST(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], UPB_UPCAST(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], UPB_UPCAST(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], UPB_UPCAST(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
+ UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
};
static const upb_enumdef enums[4] = {
diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c
index fdfa4e3..1baad81 100644
--- a/upb/descriptor/reader.c
+++ b/upb/descriptor/reader.c
@@ -272,6 +272,9 @@ static bool field_startmsg(void *closure, const void *hd) {
r->f = upb_fielddef_new(&r->defs);
free(r->default_string);
r->default_string = NULL;
+
+ // fielddefs default to packed, but descriptors default to non-packed.
+ upb_fielddef_setpacked(r->f, false);
return true;
}
@@ -378,6 +381,13 @@ static bool field_onlazy(void *closure, const void *hd, bool val) {
return true;
}
+static bool field_onpacked(void *closure, const void *hd, bool val) {
+ UPB_UNUSED(hd);
+ upb_descreader *r = closure;
+ upb_fielddef_setpacked(r->f, val);
+ return true;
+}
+
static bool field_ontype(void *closure, const void *hd, int32_t val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
@@ -552,6 +562,7 @@ static void reghandlers(const void *closure, upb_handlers *h) {
&field_ondefaultval, NULL);
} else if (m == D(FieldOptions)) {
upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
+ upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
}
}
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index 8452bea..64689f6 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -149,7 +149,7 @@ const upb_pbdecodermethod *upb_pbdecodermethod_new(
}
-/* compiler *******************************************************************/
+/* bytecode compiler **********************************************************/
// Data used only at compilation time.
typedef struct {
@@ -575,8 +575,8 @@ static void putsel(compiler *c, opcode op, upb_selector_t sel,
// Puts an opcode to call a callback, but only if a callback actually exists for
// this field and handler type.
-static void putcb(compiler *c, opcode op, const upb_handlers *h,
- const upb_fielddef *f, upb_handlertype_t type) {
+static void maybeput(compiler *c, opcode op, const upb_handlers *h,
+ const upb_fielddef *f, upb_handlertype_t type) {
putsel(c, op, getsel(f, type), h);
}
@@ -589,40 +589,165 @@ static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
}
+
+/* bytecode compiler code generation ******************************************/
+
+// Symbolic names for our local labels.
+#define LABEL_LOOPSTART 1 // Top of a repeated field loop.
+#define LABEL_LOOPBREAK 2 // To jump out of a repeated loop
+#define LABEL_FIELD 3 // Jump backward to find the most recent field.
+#define LABEL_ENDMSG 4 // To reach the OP_ENDMSG instr for this msg.
+
+// Generates bytecode to parse a single non-lazy message field.
+static void generate_msgfield(compiler *c, const upb_fielddef *f,
+ upb_pbdecodermethod *method) {
+ const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+ const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
+
+ if (!sub_m) {
+ // Don't emit any code for this field at all; it will be parsed as an
+ // unknown field.
+ return;
+ }
+
+ label(c, LABEL_FIELD);
+
+ int wire_type =
+ (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
+ ? UPB_WIRE_TYPE_DELIMITED
+ : UPB_WIRE_TYPE_START_GROUP;
+
+ if (upb_fielddef_isseq(f)) {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, wire_type, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, wire_type);
+ putop(c, OP_PUSHTAGDELIM, 0);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
+ label(c, LABEL_LOOPSTART);
+ putpush(c, f);
+ putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
+ putop(c, OP_CALL, sub_m);
+ putop(c, OP_POP);
+ maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
+ if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ putop(c, OP_SETDELIM);
+ }
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ label(c, LABEL_LOOPBREAK);
+ putop(c, OP_POP);
+ maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+ } else {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, wire_type, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, wire_type);
+ putpush(c, f);
+ putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
+ putop(c, OP_CALL, sub_m);
+ putop(c, OP_POP);
+ maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
+ if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ putop(c, OP_SETDELIM);
+ }
+ }
+}
+
+// Generates bytecode to parse a single string or lazy submessage field.
+static void generate_delimfield(compiler *c, const upb_fielddef *f,
+ upb_pbdecodermethod *method) {
+ const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+
+ label(c, LABEL_FIELD);
+ if (upb_fielddef_isseq(f)) {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+ putop(c, OP_PUSHTAGDELIM, 0);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
+ label(c, LABEL_LOOPSTART);
+ putop(c, OP_PUSHLENDELIM);
+ putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
+ // Need to emit even if no handler to skip past the string.
+ putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
+ putop(c, OP_POP);
+ maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
+ putop(c, OP_SETDELIM);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ label(c, LABEL_LOOPBREAK);
+ putop(c, OP_POP);
+ maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+ } else {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+ putop(c, OP_PUSHLENDELIM);
+ putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
+ putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
+ putop(c, OP_POP);
+ maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
+ putop(c, OP_SETDELIM);
+ }
+}
+
+// Generates bytecode to parse a single primitive field.
+static void generate_primitivefield(compiler *c, const upb_fielddef *f,
+ upb_pbdecodermethod *method) {
+ label(c, LABEL_FIELD);
+
+ const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+ upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
+
+ // From a decoding perspective, ENUM is the same as INT32.
+ if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
+ descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
+
+ opcode parse_type = (opcode)descriptor_type;
+
+ // TODO(haberman): generate packed or non-packed first depending on "packed"
+ // setting in the fielddef. This will favor (in speed) whichever was
+ // specified.
+
+ assert((int)parse_type >= 0 && parse_type <= OP_MAX);
+ upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+ int wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
+ if (upb_fielddef_isseq(f)) {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+ putop(c, OP_PUSHLENDELIM);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed
+ label(c, LABEL_LOOPSTART);
+ putop(c, parse_type, sel);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ dispatchtarget(c, method, f, wire_type);
+ putop(c, OP_PUSHTAGDELIM, 0);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed
+ label(c, LABEL_LOOPSTART);
+ putop(c, parse_type, sel);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ label(c, LABEL_LOOPBREAK);
+ putop(c, OP_POP); // Packed and non-packed join.
+ maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+ putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ.
+ } else {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, wire_type, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, wire_type);
+ putop(c, parse_type, sel);
+ }
+}
+
// Adds bytecode for parsing the given message to the given decoderplan,
// while adding all dispatch targets to this message's dispatch table.
static void compile_method(compiler *c, upb_pbdecodermethod *method) {
assert(method);
- // Symbolic names for our local labels.
- const int LABEL_LOOPSTART = 1; // Top of a repeated field loop.
- const int LABEL_LOOPBREAK = 2; // To jump out of a repeated loop
- const int LABEL_FIELD = 3; // Jump backward to find the most recent field.
- const int LABEL_ENDMSG = 4; // To reach the OP_ENDMSG instr for this msg.
-
- // Index is descriptor type.
- static const uint8_t native_wire_types[] = {
- UPB_WIRE_TYPE_END_GROUP, // ENDGROUP
- UPB_WIRE_TYPE_64BIT, // DOUBLE
- UPB_WIRE_TYPE_32BIT, // FLOAT
- UPB_WIRE_TYPE_VARINT, // INT64
- UPB_WIRE_TYPE_VARINT, // UINT64
- UPB_WIRE_TYPE_VARINT, // INT32
- UPB_WIRE_TYPE_64BIT, // FIXED64
- UPB_WIRE_TYPE_32BIT, // FIXED32
- UPB_WIRE_TYPE_VARINT, // BOOL
- UPB_WIRE_TYPE_DELIMITED, // STRING
- UPB_WIRE_TYPE_START_GROUP, // GROUP
- UPB_WIRE_TYPE_DELIMITED, // MESSAGE
- UPB_WIRE_TYPE_DELIMITED, // BYTES
- UPB_WIRE_TYPE_VARINT, // UINT32
- UPB_WIRE_TYPE_VARINT, // ENUM
- UPB_WIRE_TYPE_32BIT, // SFIXED32
- UPB_WIRE_TYPE_64BIT, // SFIXED64
- UPB_WIRE_TYPE_VARINT, // SINT32
- UPB_WIRE_TYPE_VARINT, // SINT64
- };
-
// Clear all entries in the dispatch table.
upb_inttable_uninit(&method->dispatch);
upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
@@ -637,128 +762,15 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
- upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
upb_fieldtype_t type = upb_fielddef_type(f);
- // From a decoding perspective, ENUM is the same as INT32.
- if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
- descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
-
if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
- const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
- if (!sub_m) {
- // Don't emit any code for this field at all; it will be parsed as an
- // unknown field.
- continue;
- }
-
- label(c, LABEL_FIELD);
-
- int wire_type = (descriptor_type == UPB_DESCRIPTOR_TYPE_MESSAGE)
- ? UPB_WIRE_TYPE_DELIMITED
- : UPB_WIRE_TYPE_START_GROUP;
- if (upb_fielddef_isseq(f)) {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, wire_type, LABEL_DISPATCH);
- dispatchtarget(c, method, f, wire_type);
- putop(c, OP_PUSHTAGDELIM, 0);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
- label(c, LABEL_LOOPSTART);
- putpush(c, f);
- putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
- putop(c, OP_CALL, sub_m);
- putop(c, OP_POP);
- putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
- if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
- putop(c, OP_SETDELIM);
- }
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- label(c, LABEL_LOOPBREAK);
- putop(c, OP_POP);
- putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
- } else {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, wire_type, LABEL_DISPATCH);
- dispatchtarget(c, method, f, wire_type);
- putpush(c, f);
- putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
- putop(c, OP_CALL, sub_m);
- putop(c, OP_POP);
- putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
- if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
- putop(c, OP_SETDELIM);
- }
- }
+ generate_msgfield(c, f, method);
} else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
type == UPB_TYPE_MESSAGE) {
- label(c, LABEL_FIELD);
- if (upb_fielddef_isseq(f)) {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
- dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
- putop(c, OP_PUSHTAGDELIM, 0);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
- label(c, LABEL_LOOPSTART);
- putop(c, OP_PUSHLENDELIM);
- putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
- // Need to emit even if no handler to skip past the string.
- putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
- putop(c, OP_POP);
- putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
- putop(c, OP_SETDELIM);
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- label(c, LABEL_LOOPBREAK);
- putop(c, OP_POP);
- putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
- } else {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
- dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
- putop(c, OP_PUSHLENDELIM);
- putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
- putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
- putop(c, OP_POP);
- putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
- putop(c, OP_SETDELIM);
- }
+ generate_delimfield(c, f, method);
} else {
- label(c, LABEL_FIELD);
- opcode parse_type = (opcode)descriptor_type;
- assert((int)parse_type >= 0 && parse_type <= OP_MAX);
- upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
- int wire_type = native_wire_types[upb_fielddef_descriptortype(f)];
- if (upb_fielddef_isseq(f)) {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
- dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
- putop(c, OP_PUSHLENDELIM);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed
- label(c, LABEL_LOOPSTART);
- putop(c, parse_type, sel);
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- dispatchtarget(c, method, f, wire_type);
- putop(c, OP_PUSHTAGDELIM, 0);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed
- label(c, LABEL_LOOPSTART);
- putop(c, parse_type, sel);
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- label(c, LABEL_LOOPBREAK);
- putop(c, OP_POP); // Packed and non-packed join.
- putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
- putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ.
- } else {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, wire_type, LABEL_DISPATCH);
- dispatchtarget(c, method, f, wire_type);
- putop(c, parse_type, sel);
- }
+ generate_primitivefield(c, f, method);
}
}
diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c
index 44c4419..b4086c7 100644
--- a/upb/pb/compile_decoder_x64.c
+++ b/upb/pb/compile_decoder_x64.c
@@ -23,7 +23,7 @@
//
// Note: this mode requires that we can shell out to gcc.
//
-// 2. Run the test once locally. This will load the JIT code by building a
+// 2. Run the test locally. This will load the JIT code by building a
// .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
// work properly (like GDB).
//
diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc
index 180017f..a87b376 100644
--- a/upb/pb/compile_decoder_x64.dasc
+++ b/upb/pb/compile_decoder_x64.dasc
@@ -61,17 +61,21 @@
| add DELIMEND, DECODER->buf
|.endmacro
|
-| // OPT: use "call rel32" where possible.
+| // Calls an external C function at address "addr".
|.macro callp, addr
-|| {
-|| //int64_t ofs = (int64_t)addr - (int64_t)upb_status_init;
-|| //if (ofs > (1 << 30) || ofs < -(1 << 30)) {
| mov64 rax, (uintptr_t)addr
+|
+| // Stack must be 16-byte aligned (x86-64 ABI requires this).
+| //
+| // OPT: possibly remove this by statically ensuring correct alignment.
+| //
+| // OPT: use "call rel32" where possible.
+| push r12
+| mov r12, rsp
+| and rsp, 0xfffffffffffffff0UL // Align stack.
| call rax
-|| //} else {
-| // call &addr
-|| //}
-|| }
+| mov rsp, r12
+| pop r12
|.endmacro
|
|.macro ld64, val
@@ -208,12 +212,6 @@ static void emit_static_asm(jitcompiler *jc) {
| push r12
| push rbx
|
- | // Align stack.
- | // Since the JIT can call other functions (the JIT'ted code is not a leaf
- | // function) we must respect alignment rules. All x86-64 systems require
- | // 16-byte stack alignment.
- | sub rsp, 8
- |
| mov rbx, ARG2_64 // Preserve JIT method.
|
| mov DECODER, rdi
@@ -234,7 +232,6 @@ static void emit_static_asm(jitcompiler *jc) {
| mov rax, DECODER->size_param
| mov qword DECODER->call_len, 0
|1:
- | add rsp, 8 // Counter previous alignment.
| pop rbx
| pop r12
| pop r13
@@ -270,7 +267,6 @@ static void emit_static_asm(jitcompiler *jc) {
| // Must NOT do this before the memcpy(), otherwise memcpy() will
| // clobber the stack we are trying to save!
| mov rsp, DECODER->saved_rsp
- | add rsp, 8 // Counter previous alignment.
| pop rbx
| pop r12
| pop r13
diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c
index 975f3ab..4681c20 100644
--- a/upb/pb/encoder.c
+++ b/upb/pb/encoder.c
@@ -1,421 +1,496 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Since we are implementing pure handlers (ie. without any out-of-band access
+ * to pre-computed lengths), we have to buffer all submessages before we can
+ * emit even their first byte.
+ *
+ * Not knowing the size of submessages also means we can't write a perfect
+ * zero-copy implementation, even with buffering. Lengths are stored as
+ * varints, which means that we don't know how many bytes to reserve for the
+ * length until we know what the length is.
+ *
+ * This leaves us with three main choices:
+ *
+ * 1. buffer all submessage data in a temporary buffer, then copy it exactly
+ * once into the output buffer.
+ *
+ * 2. attempt to buffer data directly into the output buffer, estimating how
+ * many bytes each length will take. When our guesses are wrong, use
+ * memmove() to grow or shrink the allotted space.
+ *
+ * 3. buffer directly into the output buffer, allocating a max length
+ * ahead-of-time for each submessage length. If we overallocated, we waste
+ * space, but no memcpy() or memmove() is required. This approach requires
+ * defining a maximum size for submessages and rejecting submessages that
+ * exceed that size.
+ *
+ * (2) and (3) have the potential to have better performance, but they are more
+ * complicated and subtle to implement:
+ *
+ * (3) requires making an arbitrary choice of the maximum message size; it
+ * wastes space when submessages are shorter than this and fails
+ * completely when they are longer. This makes it more finicky and
+ * requires configuration based on the input. It also makes it impossible
+ * to perfectly match the output of reference encoders that always use the
+ * optimal amount of space for each length.
+ *
+ * (2) requires guessing the the size upfront, and if multiple lengths are
+ * guessed wrong the minimum required number of memmove() operations may
+ * be complicated to compute correctly. Implemented properly, it may have
+ * a useful amortized or average cost, but more investigation is required
+ * to determine this and what the optimal algorithm is to achieve it.
+ *
+ * (1) makes you always pay for exactly one copy, but its implementation is
+ * the simplest and its performance is predictable.
+ *
+ * So for now, we implement (1) only. If we wish to optimize later, we should
+ * be able to do it without affecting users.
+ *
+ * The strategy is to buffer the segments of data that do *not* depend on
+ * unknown lengths in one buffer, and keep a separate buffer of segment pointers
+ * and lengths. When the top-level submessage ends, we can go beginning to end,
+ * alternating the writing of lengths with memcpy() of the rest of the data.
+ * At the top level though, no buffering is required.
*/
#include "upb/pb/encoder.h"
+#include "upb/pb/varint.int.h"
#include <stdlib.h>
-#include "upb/descriptor.h"
-
-/* Functions for calculating sizes of wire values. ****************************/
-
-static size_t upb_v_uint64_t_size(uint64_t val) {
-#ifdef __GNUC__
- int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
-#else
- int high_bit = 0;
- uint64_t tmp = val;
- while(tmp >>= 1) high_bit++;
-#endif
- return val == 0 ? 1 : high_bit / 7 + 1;
-}
-static size_t upb_v_int32_t_size(int32_t val) {
- // v_uint32's are sign-extended to maintain wire compatibility with int64s.
- return upb_v_uint64_t_size((int64_t)val);
+/* low-level buffering ********************************************************/
+
+// Low-level functions for interacting with the output buffer.
+
+// TODO(haberman): handle pushback
+static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
+ size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
+ UPB_ASSERT_VAR(n, n == len);
}
-static size_t upb_v_uint32_t_size(uint32_t val) {
- return upb_v_uint64_t_size(val);
+
+static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
+ return &e->segbuf[*e->top];
}
-static size_t upb_f_uint64_t_size(uint64_t val) {
- (void)val; // Length is independent of value.
- return sizeof(uint64_t);
+
+// Call to ensure that at least "bytes" bytes are available for writing at
+// e->ptr. Returns false if the bytes could not be allocated.
+static bool reserve(upb_pb_encoder *e, size_t bytes) {
+ if ((e->limit - e->ptr) < bytes) {
+ size_t needed = bytes + (e->ptr - e->buf);
+ size_t old_size = e->limit - e->buf;
+ size_t new_size = old_size;
+ while (new_size < needed) {
+ new_size *= 2;
+ }
+
+ char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf;
+ char *new_buf = realloc(realloc_from, new_size);
+
+ if (new_buf == NULL) {
+ return false;
+ }
+
+ if (realloc_from == NULL) {
+ memcpy(new_buf, e->initbuf, old_size);
+ }
+
+ e->ptr = new_buf + (e->ptr - e->buf);
+ e->runbegin = new_buf + (e->runbegin - e->buf);
+ e->limit = new_buf + new_size;
+ e->buf = new_buf;
+ }
+
+ return true;
}
-static size_t upb_f_uint32_t_size(uint32_t val) {
- (void)val; // Length is independent of value.
- return sizeof(uint32_t);
+
+// Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
+// previously called reserve() with at least this many bytes.
+static void advance(upb_pb_encoder *e, size_t bytes) {
+ assert((e->limit - e->ptr) >= bytes);
+ e->ptr += bytes;
}
+// Call when all of the bytes for a handler have been written. Flushes the
+// bytes if possible and necessary, returning false if this failed.
+static bool commit(upb_pb_encoder *e) {
+ if (!e->top) {
+ // We aren't inside a delimited region. Flush our accumulated bytes to
+ // the output.
+ //
+ // TODO(haberman): in the future we may want to delay flushing for
+ // efficiency reasons.
+ putbuf(e, e->buf, e->ptr - e->buf);
+ e->ptr = e->buf;
+ }
-/* Functions to write wire values. ********************************************/
+ return true;
+}
-// Since we know in advance the longest that the value could be, we always make
-// sure that our buffer is long enough. This saves us from having to perform
-// bounds checks.
+// Writes the given bytes to the buffer, handling reserve/advance.
+static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
+ if (!reserve(e, len)) {
+ return false;
+ }
-// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
-static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
-{
- do {
- uint8_t byte = val & 0x7f;
- val >>= 7;
- if(val) byte |= 0x80;
- *buf++ = byte;
- } while(val);
- return buf;
+ memcpy(e->ptr, data, len);
+ advance(e, len);
+ return true;
}
-// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits.
-static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
-{
- return upb_put_v_uint64_t(buf, val);
+// Finish the current run by adding the run totals to the segment and message
+// length.
+static void accumulate(upb_pb_encoder *e) {
+ assert(e->ptr >= e->runbegin);
+ size_t run_len = e->ptr - e->runbegin;
+ e->segptr->seglen += run_len;
+ top(e)->msglen += run_len;
+ e->runbegin = e->ptr;
}
-// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to
-// maintain wire-compatibility with 64-bit signed integers.
-static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
-{
- return upb_put_v_uint64_t(buf, (int64_t)val);
+// Call to indicate the start of delimited region for which the full length is
+// not yet known. All data will be buffered until the length is known.
+// Delimited regions may be nested; their lengths will all be tracked properly.
+static bool start_delim(upb_pb_encoder *e) {
+ if (e->top) {
+ // We are already buffering, advance to the next segment and push it on the
+ // stack.
+ accumulate(e);
+
+ if (++e->top == e->stacklimit) {
+ // TODO(haberman): grow stack?
+ return false;
+ }
+
+ if (++e->segptr == e->seglimit) {
+ upb_pb_encoder_segment *realloc_from =
+ (e->segbuf == e->seginitbuf) ? NULL : e->segbuf;
+ size_t old_size =
+ (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
+ size_t new_size = old_size * 2;
+ upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size);
+
+ if (new_buf == NULL) {
+ return false;
+ }
+
+ if (realloc_from == NULL) {
+ memcpy(new_buf, e->seginitbuf, old_size);
+ }
+
+ e->segptr = new_buf + (e->segptr - e->segbuf);
+ e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
+ e->segbuf = new_buf;
+ }
+ } else {
+ // We were previously at the top level, start buffering.
+ e->segptr = e->segbuf;
+ e->top = e->stack;
+ e->runbegin = e->ptr;
+ }
+
+ *e->top = e->segptr - e->segbuf;
+ e->segptr->seglen = 0;
+ e->segptr->msglen = 0;
+
+ return true;
}
-static void upb_put32(uint8_t *buf, uint32_t val) {
- buf[0] = val & 0xff;
- buf[1] = (val >> 8) & 0xff;
- buf[2] = (val >> 16) & 0xff;
- buf[3] = (val >> 24);
+// Call to indicate the end of a delimited region. We now know the length of
+// the delimited region. If we are not nested inside any other delimited
+// regions, we can now emit all of the buffered data we accumulated.
+static bool end_delim(upb_pb_encoder *e) {
+ accumulate(e);
+ size_t msglen = top(e)->msglen;
+
+ if (e->top == e->stack) {
+ // All lengths are now available, emit all buffered data.
+ char buf[UPB_PB_VARINT_MAX_LEN];
+ upb_pb_encoder_segment *s;
+ const char *ptr = e->buf;
+ for (s = e->segbuf; s <= e->segptr; s++) {
+ size_t lenbytes = upb_vencode64(s->msglen, buf);
+ putbuf(e, buf, lenbytes);
+ putbuf(e, ptr, s->seglen);
+ ptr += s->seglen;
+ }
+
+ e->ptr = e->buf;
+ e->top = NULL;
+ } else {
+ // Need to keep buffering; propagate length info into enclosing submessages.
+ --e->top;
+ top(e)->msglen += msglen + upb_varint_size(msglen);
+ }
+
+ return true;
}
-// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
-static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
-{
- uint8_t *uint32_end = buf + sizeof(uint32_t);
-#if UPB_UNALIGNED_READS_OK
- *(uint32_t*)buf = val;
-#else
- upb_put32(buf, val);
-#endif
- return uint32_end;
+
+/* tag_t **********************************************************************/
+
+// A precomputed (pre-encoded) tag and length.
+
+typedef struct {
+ uint8_t bytes;
+ char tag[7];
+} tag_t;
+
+// Allocates a new tag for this field, and sets it in these handlerattr.
+static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
+ upb_handlerattr *attr) {
+ uint32_t n = upb_fielddef_number(f);
+
+ tag_t *tag = malloc(sizeof(tag_t));
+ tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
+
+ upb_handlerattr_init(attr);
+ upb_handlerattr_sethandlerdata(attr, tag);
+ upb_handlers_addcleanup(h, tag, free);
}
-// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
-static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
-{
- uint8_t *uint64_end = buf + sizeof(uint64_t);
-#if UPB_UNALIGNED_READS_OK
- *(uint64_t*)buf = val;
-#else
- upb_put32(buf, (uint32_t)val);
- upb_put32(buf, (uint32_t)(val >> 32));
-#endif
- return uint64_end;
+static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
+ return encode_bytes(e, tag->tag, tag->bytes);
}
-/* Functions to write and calculate sizes for .proto values. ******************/
-// Performs zig-zag encoding, which is used by sint32 and sint64.
-static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
-static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+/* encoding of wire types *****************************************************/
-/* Use macros to define a set of two functions for each .proto type:
- *
- * // Converts and writes a .proto value into buf. "end" indicates the end
- * // of the current available buffer (if the buffer does not contain enough
- * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will
- * // point one past the data that was written.
- * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
- *
- * // Returns the number of bytes required to encode val.
- * size_t upb_get_INT32_size(int32_t val);
- *
- * // Given a .proto value s (source) convert it to a wire value.
- * uint32_t upb_vtowv_INT32(int32_t s);
- */
+static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
+ // TODO(haberman): byte-swap for big endian.
+ return encode_bytes(e, &val, sizeof(uint64_t));
+}
-#define VTOWV(type, wire_t, val_t) \
- static wire_t upb_vtowv_ ## type(val_t s)
+static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
+ // TODO(haberman): byte-swap for big endian.
+ return encode_bytes(e, &val, sizeof(uint32_t));
+}
-#define PUT(type, v_or_f, wire_t, val_t, member_name) \
- static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
- wire_t tmp = upb_vtowv_ ## type(val); \
- return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
+static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
+ if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
+ return false;
}
-#define T(type, v_or_f, wire_t, val_t, member_name) \
- static size_t upb_get_ ## type ## _size(val_t val) { \
- return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
- } \
- VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \
- PUT(type, v_or_f, wire_t, val_t, member_name) \
- VTOWV(type, wire_t, val_t)
-
-T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; }
-T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; }
-T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
-T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
-T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); }
-T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); }
-T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
-T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
-T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; }
-T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; }
-T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; }
-T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; }
-T(DOUBLE, f, uint64_t, double, _double) {
- upb_value v;
- v._double = s;
- return v.uint64;
+ advance(e, upb_vencode64(val, e->ptr));
+ return true;
}
-T(FLOAT, f, uint32_t, float, _float) {
- upb_value v;
- v._float = s;
- return v.uint32;
+
+static uint64_t dbl2uint64(double d) {
+ uint64_t ret;
+ memcpy(&ret, &d, sizeof(uint64_t));
+ return ret;
}
-#undef VTOWV
-#undef PUT
-#undef T
-static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
-{
-#define CASE(t, member_name) \
- case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
- switch(ft) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- default: assert(false); return buf;
+static uint32_t flt2uint32(float d) {
+ uint32_t ret;
+ memcpy(&ret, &d, sizeof(uint32_t));
+ return ret;
+}
+
+
+/* encoding of proto types ****************************************************/
+
+static bool startmsg(void *c, const void *hd) {
+ upb_pb_encoder *e = c;
+ UPB_UNUSED(hd);
+ if (e->depth++ == 0) {
+ upb_bytessink_start(e->output_, 0, &e->subc);
}
-#undef CASE
+ return true;
}
-static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
-{
-#define CASE(t, member_name) \
- case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
- switch(ft) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- default: assert(false); return 0;
+static bool endmsg(void *c, const void *hd, upb_status *status) {
+ upb_pb_encoder *e = c;
+ UPB_UNUSED(hd);
+ UPB_UNUSED(status);
+ if (--e->depth == 0) {
+ upb_bytessink_end(e->output_);
}
-#undef CASE
+ return true;
}
-static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
- upb_wire_type_t wt)
-{
- return upb_put_UINT32(buf, wt | (num << 3));
+static void *encode_startdelimfield(void *c, const void *hd) {
+ bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
+ return ok ? c : UPB_BREAK;
}
-static uint32_t _upb_get_tag_size(upb_field_number_t num)
-{
- return upb_get_UINT32_size(num << 3);
+static bool encode_enddelimfield(void *c, const void *hd) {
+ UPB_UNUSED(hd);
+ return end_delim(c);
}
+static void *encode_startgroup(void *c, const void *hd) {
+ return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
+}
-/* upb_sizebuilder ************************************************************/
+static bool encode_endgroup(void *c, const void *hd) {
+ return encode_tag(c, hd) && commit(c);
+}
-struct upb_sizebuilder {
- // Accumulating size for the current level.
- uint32_t size;
+static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
+ UPB_UNUSED(size_hint);
+ return encode_startdelimfield(c, hd);
+}
- // Stack of sizes for our current nesting.
- uint32_t stack[UPB_MAX_NESTING], *top;
+static size_t encode_strbuf(void *c, const void *hd, const char *buf,
+ size_t len, const upb_bufhandle *h) {
+ UPB_UNUSED(hd);
+ UPB_UNUSED(h);
+ return encode_bytes(c, buf, len) ? len : 0;
+}
- // Vector of sizes.
- uint32_t *sizes;
- int sizes_len;
- int sizes_size;
+#define T(type, ctype, convert, encode) \
+ static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
+ return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
+ } \
+ static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
+ UPB_UNUSED(hd); \
+ return encode(e, (convert)(val)); \
+ }
- upb_status status;
-};
+T(double, double, dbl2uint64, encode_fixed64)
+T(float, float, flt2uint32, encode_fixed32);
+T(int64, int64_t, uint64_t, encode_varint);
+T(int32, int32_t, uint32_t, encode_varint);
+T(fixed64, uint64_t, uint64_t, encode_fixed64);
+T(fixed32, uint32_t, uint32_t, encode_fixed32);
+T(bool, bool, bool, encode_varint);
+T(uint32, uint32_t, uint32_t, encode_varint);
+T(uint64, uint64_t, uint64_t, encode_varint);
+T(enum, int32_t, uint32_t, encode_varint);
+T(sfixed32, int32_t, uint32_t, encode_fixed32);
+T(sfixed64, int64_t, uint64_t, encode_fixed64);
+T(sint32, int32_t, upb_zzenc_32, encode_varint);
+T(sint64, int64_t, upb_zzenc_64, encode_varint);
-// upb_sink callbacks.
-static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
- upb_value val,
- upb_status *status)
-{
- (void)status;
- upb_sizebuilder *sb = (upb_sizebuilder*)sink;
- uint32_t size = 0;
- size += _upb_get_tag_size(f->number);
- size += _upb_get_value_size(f->type, val);
- sb->size += size;
- return UPB_SINK_CONTINUE;
-}
+#undef T
-static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
- upb_strptr str,
- int32_t start, uint32_t end,
- upb_status *status)
-{
- (void)status;
- (void)str; // String data itself is not used.
- upb_sizebuilder *sb = (upb_sizebuilder*)sink;
- if(start >= 0) {
- uint32_t size = 0;
- size += _upb_get_tag_size(f->number);
- size += upb_get_UINT32_size(end - start);
- sb->size += size;
- }
- return UPB_SINK_CONTINUE;
-}
-static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
- upb_status *status)
-{
- (void)status;
- (void)f; // Unused (we calculate tag size and delimiter in endcb).
- upb_sizebuilder *sb = (upb_sizebuilder*)sink;
- if(f->type == UPB_TYPE(MESSAGE)) {
- *sb->top = sb->size;
- sb->top++;
- sb->size = 0;
- } else {
- assert(f->type == UPB_TYPE(GROUP));
- sb->size += _upb_get_tag_size(f->number);
- }
- return UPB_SINK_CONTINUE;
-}
+/* code to build the handlers *************************************************/
+
+static void newhandlers_callback(const void *closure, upb_handlers *h) {
+ UPB_UNUSED(closure);
-static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
- upb_status *status)
-{
- (void)status;
- upb_sizebuilder *sb = (upb_sizebuilder*)sink;
- if(f->type == UPB_TYPE(MESSAGE)) {
- sb->top--;
- if(sb->sizes_len == sb->sizes_size) {
- sb->sizes_size *= 2;
- sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+ upb_handlers_setstartmsg(h, startmsg, NULL);
+ upb_handlers_setendmsg(h, endmsg, NULL);
+
+ const upb_msgdef *m = upb_handlers_msgdef(h);
+ upb_msg_iter i;
+ for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+ const upb_fielddef *f = upb_msg_iter_field(&i);
+ bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
+ upb_fielddef_packed(f);
+ upb_handlerattr attr;
+ upb_wiretype_t wt =
+ packed ? UPB_WIRE_TYPE_DELIMITED
+ : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
+
+ // Pre-encode the tag for this field.
+ new_tag(h, f, wt, &attr);
+
+ if (packed) {
+ upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
+ upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
}
- uint32_t child_size = sb->size;
- uint32_t parent_size = *sb->top;
- sb->sizes[sb->sizes_len++] = child_size;
- // The size according to the parent includes the tag size and delimiter of
- // the submessage.
- parent_size += upb_get_UINT32_size(child_size);
- parent_size += _upb_get_tag_size(f->number);
- // Include size accumulated in parent before child began.
- sb->size = child_size + parent_size;
- } else {
- assert(f->type == UPB_TYPE(GROUP));
- // As an optimization, we could just add this number twice in startcb, to
- // avoid having to recalculate it.
- sb->size += _upb_get_tag_size(f->number);
+
+#define T(upper, lower, upbtype) \
+ case UPB_DESCRIPTOR_TYPE_##upper: \
+ if (packed) { \
+ upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
+ } else { \
+ upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
+ } \
+ break;
+
+ switch (upb_fielddef_descriptortype(f)) {
+ T(DOUBLE, double, double);
+ T(FLOAT, float, float);
+ T(INT64, int64, int64);
+ T(INT32, int32, int32);
+ T(FIXED64, fixed64, uint64);
+ T(FIXED32, fixed32, uint32);
+ T(BOOL, bool, bool);
+ T(UINT32, uint32, uint32);
+ T(UINT64, uint64, uint64);
+ T(ENUM, enum, int32);
+ T(SFIXED32, sfixed32, int32);
+ T(SFIXED64, sfixed64, int64);
+ T(SINT32, sint32, int32);
+ T(SINT64, sint64, int64);
+ case UPB_DESCRIPTOR_TYPE_STRING:
+ case UPB_DESCRIPTOR_TYPE_BYTES:
+ upb_handlers_setstartstr(h, f, encode_startstr, &attr);
+ upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
+ upb_handlers_setstring(h, f, encode_strbuf, &attr);
+ break;
+ case UPB_DESCRIPTOR_TYPE_MESSAGE:
+ upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
+ upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
+ break;
+ case UPB_DESCRIPTOR_TYPE_GROUP: {
+ // Endgroup takes a different tag (wire_type = END_GROUP).
+ upb_handlerattr attr2;
+ new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
+
+ upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
+ upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
+
+ upb_handlerattr_uninit(&attr2);
+ break;
+ }
+ }
+
+#undef T
+
+ upb_handlerattr_uninit(&attr);
}
- return UPB_SINK_CONTINUE;
}
-upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
- _upb_sizebuilder_valuecb,
- _upb_sizebuilder_strcb,
- _upb_sizebuilder_startcb,
- _upb_sizebuilder_endcb
-};
-
-
-/* upb_sink callbacks *********************************************************/
-
-struct upb_encoder {
- upb_sink base;
- //upb_bytesink *bytesink;
- uint32_t *sizes;
- int size_offset;
-};
-
-
-// Within one callback we may need to encode up to two separate values.
-#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
-
-static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
- size_t len, upb_status *status)
-{
- // TODO: conjure a upb_strptr that points to buf.
- //upb_strptr ptr;
- (void)s;
- (void)buf;
- (void)status;
- size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
- if(written < len) {
- // TODO: mark to skip "written" bytes next time.
- return UPB_SINK_STOP;
- } else {
- return UPB_SINK_CONTINUE;
- }
+
+/* public API *****************************************************************/
+
+const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
+ const void *owner) {
+ return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
}
-static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
- upb_value val, upb_status *status)
-{
- upb_encoder *s = (upb_encoder*)sink;
- uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
- upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
- // TODO: handle packed encoding.
- ptr = _upb_put_tag(ptr, f->number, wt);
- ptr = upb_encode_value(ptr, f->type, val);
- return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
+
+void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) {
+ e->output_ = NULL;
+ e->subc = NULL;
+ e->buf = e->initbuf;
+ e->ptr = e->buf;
+ e->limit = e->buf + ARRAYSIZE(e->initbuf);
+ e->segbuf = e->seginitbuf;
+ e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf);
+ e->stacklimit = e->stack + ARRAYSIZE(e->stack);
+ upb_sink_reset(&e->input_, h, e);
}
-static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
- upb_strptr str,
- int32_t start, uint32_t end,
- upb_status *status)
-{
- upb_encoder *s = (upb_encoder*)sink;
- uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
- if(start >= 0) {
- ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
- ptr = upb_put_UINT32(ptr, end - start);
+void upb_pb_encoder_uninit(upb_pb_encoder *e) {
+ if (e->buf != e->initbuf) {
+ free(e->buf);
}
- // TODO: properly handle partially consumed strings and partially supplied
- // strings.
- _upb_encoder_push_buf(s, buf, ptr - buf, status);
- return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
-}
-static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
- upb_status *status)
-{
- upb_encoder *s = (upb_encoder*)sink;
- uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
- if(f->type == UPB_TYPE(GROUP)) {
- ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
- } else {
- ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
- ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
+ if (e->segbuf != e->seginitbuf) {
+ free(e->segbuf);
}
- return _upb_encoder_push_buf(s, buf, ptr - buf, status);
}
-static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
- upb_status *status)
-{
- upb_encoder *s = (upb_encoder*)sink;
- uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
- if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
- ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
- return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) {
+ upb_pb_encoder_reset(e);
+ e->output_ = output;
+ e->subc = output->closure;
}
-upb_sink_callbacks _upb_encoder_sink_vtbl = {
- _upb_encoder_valuecb,
- _upb_encoder_strcb,
- _upb_encoder_startcb,
- _upb_encoder_endcb
-};
+void upb_pb_encoder_reset(upb_pb_encoder *e) {
+ e->segptr = NULL;
+ e->top = NULL;
+ e->depth = 0;
+}
+upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h
index 563b78d..2df5797 100644
--- a/upb/pb/encoder.h
+++ b/upb/pb/encoder.h
@@ -7,52 +7,155 @@
* Implements a set of upb_handlers that write protobuf data to the binary wire
* format.
*
- * For messages that have any submessages, the encoder needs a buffer
- * containing the submessage sizes, so they can be properly written at the
- * front of each message. Note that groups do *not* have this requirement.
+ * This encoder implementation does not have any access to any out-of-band or
+ * precomputed lengths for submessages, so it must buffer submessages internally
+ * before it can emit the first byte.
*/
#ifndef UPB_ENCODER_H_
#define UPB_ENCODER_H_
-#include "upb/upb.h"
-#include "upb/bytestream.h"
+#include "upb/sink.h"
#ifdef __cplusplus
-extern "C" {
+namespace upb {
+namespace pb {
+class Encoder;
+} // namespace pb
+} // namespace upb
#endif
-/* upb_encoder ****************************************************************/
+UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder);
-// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
-// buffer binary wire format.
-struct upb_encoder;
-typedef struct upb_encoder upb_encoder;
+#define UPB_PBENCODER_MAX_NESTING 100
-upb_encoder *upb_encoder_new(upb_msgdef *md);
-void upb_encoder_free(upb_encoder *e);
+/* upb::pb::Encoder ***********************************************************/
-// Resets the given upb_encoder such that is is ready to begin encoding,
-// outputting data to "bytesink" (which must live until the encoder is
-// reset or destroyed).
-void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
+// The output buffer is divided into segments; a segment is a string of data
+// that is "ready to go" -- it does not need any varint lengths inserted into
+// the middle. The seams between segments are where varints will be inserted
+// once they are known.
+//
+// We also use the concept of a "run", which is a range of encoded bytes that
+// occur at a single submessage level. Every segment contains one or more runs.
+//
+// A segment can span messages. Consider:
+//
+// .--Submessage lengths---------.
+// | | |
+// | V V
+// V | |--------------- | |-----------------
+// Submessages: | |-----------------------------------------------
+// Top-level msg: ------------------------------------------------------------
+//
+// Segments: ----- ------------------- -----------------
+// Runs: *---- *--------------*--- *----------------
+// (* marks the start)
+//
+// Note that the top-level menssage is not in any segment because it does not
+// have any length preceding it.
+//
+// A segment is only interrupted when another length needs to be inserted. So
+// observe how the second segment spans both the inner submessage and part of
+// the next enclosing message.
+typedef struct {
+ UPB_PRIVATE_FOR_CPP
+ uint32_t msglen; // The length to varint-encode before this segment.
+ uint32_t seglen; // Length of the segment.
+} upb_pb_encoder_segment;
-// Returns the upb_sink to which data can be written. The sink is invalidated
-// when the encoder is reset or destroyed. Note that if the client wants to
-// encode any length-delimited submessages it must first call
-// upb_encoder_buildsizes() below.
-upb_sink *upb_encoder_sink(upb_encoder *e);
+UPB_DEFINE_CLASS0(upb::pb::Encoder,
+ public:
+ Encoder(const upb::Handlers* handlers);
+ ~Encoder();
-// Call prior to pushing any data with embedded submessages. "src" must yield
-// exactly the same data as what will next be encoded, but in reverse order.
-// The encoder iterates over this data in order to determine the sizes of the
-// submessages. If any errors are returned by the upb_src, the status will
-// be saved in *status. If the client is sure that the upb_src will not throw
-// any errors, "status" may be NULL.
-void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
+ static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* msg);
+
+ // Resets the state of the printer, so that it will expect to begin a new
+ // document.
+ void Reset();
+
+ // Resets the output pointer which will serve as our closure.
+ void ResetOutput(BytesSink* output);
+
+ // The input to the encoder.
+ Sink* input();
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(Encoder);
+,
+UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE(
+ // Our input and output.
+ upb_sink input_;
+ upb_bytessink *output_;
+
+ // The "subclosure" -- used as the inner closure as part of the bytessink
+ // protocol.
+ void *subc;
+
+ // The output buffer and limit, and our current write position. "buf"
+ // initially points to "initbuf", but is dynamically allocated if we need to
+ // grow beyond the initial size.
+ char *buf, *ptr, *limit;
+
+ // The beginning of the current run, or undefined if we are at the top level.
+ char *runbegin;
+
+ // The list of segments we are accumulating.
+ upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
+
+ // The stack of enclosing submessages. Each entry in the stack points to the
+ // segment where this submessage's length is being accumulated.
+ int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit;
+
+ // Depth of startmsg/endmsg calls.
+ int depth;
+
+ // Initial buffers for the output buffer and segment buffer. If we outgrow
+ // these we will dynamically allocate bigger ones.
+ char initbuf[256];
+ upb_pb_encoder_segment seginitbuf[32];
+)));
+
+UPB_BEGIN_EXTERN_C
+
+const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
+ const void *owner);
+void upb_pb_encoder_reset(upb_pb_encoder *e);
+upb_sink *upb_pb_encoder_input(upb_pb_encoder *p);
+void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h);
+void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output);
+void upb_pb_encoder_uninit(upb_pb_encoder *e);
+
+UPB_END_EXTERN_C
#ifdef __cplusplus
-} /* extern "C" */
+
+namespace upb {
+namespace pb {
+inline Encoder::Encoder(const upb::Handlers* handlers) {
+ upb_pb_encoder_init(this, handlers);
+}
+inline Encoder::~Encoder() {
+ upb_pb_encoder_uninit(this);
+}
+inline void Encoder::Reset() {
+ upb_pb_encoder_reset(this);
+}
+inline void Encoder::ResetOutput(BytesSink* output) {
+ upb_pb_encoder_resetoutput(this, output);
+}
+inline Sink* Encoder::input() {
+ return upb_pb_encoder_input(this);
+}
+inline reffed_ptr<const Handlers> Encoder::NewHandlers(
+ const upb::MessageDef *md) {
+ const Handlers* h = upb_pb_encoder_newhandlers(md, &h);
+ return reffed_ptr<const Handlers>(h, &h);
+}
+} // namespace pb
+} // namespace upb
+
#endif
#endif /* UPB_ENCODER_H_ */
diff --git a/upb/pb/varint.c b/upb/pb/varint.c
index ccd752d..365deb4 100644
--- a/upb/pb/varint.c
+++ b/upb/pb/varint.c
@@ -7,6 +7,29 @@
#include "upb/pb/varint.int.h"
+// Index is descriptor type.
+const uint8_t upb_pb_native_wire_types[] = {
+ UPB_WIRE_TYPE_END_GROUP, // ENDGROUP
+ UPB_WIRE_TYPE_64BIT, // DOUBLE
+ UPB_WIRE_TYPE_32BIT, // FLOAT
+ UPB_WIRE_TYPE_VARINT, // INT64
+ UPB_WIRE_TYPE_VARINT, // UINT64
+ UPB_WIRE_TYPE_VARINT, // INT32
+ UPB_WIRE_TYPE_64BIT, // FIXED64
+ UPB_WIRE_TYPE_32BIT, // FIXED32
+ UPB_WIRE_TYPE_VARINT, // BOOL
+ UPB_WIRE_TYPE_DELIMITED, // STRING
+ UPB_WIRE_TYPE_START_GROUP, // GROUP
+ UPB_WIRE_TYPE_DELIMITED, // MESSAGE
+ UPB_WIRE_TYPE_DELIMITED, // BYTES
+ UPB_WIRE_TYPE_VARINT, // UINT32
+ UPB_WIRE_TYPE_VARINT, // ENUM
+ UPB_WIRE_TYPE_32BIT, // SFIXED32
+ UPB_WIRE_TYPE_64BIT, // SFIXED64
+ UPB_WIRE_TYPE_VARINT, // SINT32
+ UPB_WIRE_TYPE_VARINT, // SINT64
+};
+
// A basic branch-based decoder, uses 32-bit values to get good performance
// on 32-bit architectures (but performs well on 64-bits also).
// This scheme comes from the original Google Protobuf implementation (proto2).
diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h
index d92fef9..8498acd 100644
--- a/upb/pb/varint.int.h
+++ b/upb/pb/varint.int.h
@@ -37,6 +37,10 @@ typedef enum {
// wiki document about this).
#define UPB_PB_VARINT_MAX_LEN 10
+// Array of the "native" (ie. non-packed-repeated) wire type for the given a
+// descriptor type (upb_descriptortype_t).
+extern const uint8_t upb_pb_native_wire_types[];
+
/* Zig-zag encoding/decoding **************************************************/
UPB_INLINE int32_t upb_zzdec_32(uint32_t n) {
@@ -129,6 +133,11 @@ UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
return i;
}
+UPB_INLINE size_t upb_varint_size(uint64_t val) {
+ char buf[UPB_PB_VARINT_MAX_LEN];
+ return upb_vencode64(val, buf);
+}
+
// Encodes a 32-bit varint, *not* sign-extended.
UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
char buf[UPB_PB_VARINT_MAX_LEN];
diff --git a/upb/table.c b/upb/table.c
index 3fd4b0f..63bb068 100644
--- a/upb/table.c
+++ b/upb/table.c
@@ -42,14 +42,36 @@ char *upb_strdup(const char *s) {
return p;
}
-static upb_tabkey strkey(const char *str) {
- upb_tabkey k;
- k.str = (char*)str;
+// A type to represent the lookup key of either a strtable or an inttable.
+// This is like upb_tabkey, but can carry a size also to allow lookups of
+// non-NULL-terminated strings (we don't store string lengths in the table).
+typedef struct {
+ upb_tabkey key;
+ uint32_t len; // For string keys only.
+} lookupkey_t;
+
+static lookupkey_t strkey(const char *str) {
+ lookupkey_t k;
+ k.key.str = (char*)str;
+ k.len = strlen(str);
return k;
}
-typedef const upb_tabent *hashfunc_t(const upb_table *t, upb_tabkey key);
-typedef bool eqlfunc_t(upb_tabkey k1, upb_tabkey k2);
+static lookupkey_t strkey2(const char *str, size_t len) {
+ lookupkey_t k;
+ k.key.str = (char*)str;
+ k.len = len;
+ return k;
+}
+
+static lookupkey_t intkey(uintptr_t key) {
+ lookupkey_t k;
+ k.key = upb_intkey(key);
+ return k;
+}
+
+typedef uint32_t hashfunc_t(upb_tabkey key);
+typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
/* Base table (shared code) ***************************************************/
@@ -85,10 +107,14 @@ static upb_tabent *emptyent(upb_table *t) {
while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
}
-static const upb_tabent *findentry(const upb_table *t, upb_tabkey key,
- hashfunc_t *hash, eqlfunc_t *eql) {
+static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
+ return (upb_tabent*)upb_getentry(t, hash);
+}
+
+static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
+ uint32_t hash, eqlfunc_t *eql) {
if (t->size_lg2 == 0) return NULL;
- const upb_tabent *e = hash(t, key);
+ const upb_tabent *e = upb_getentry(t, hash);
if (upb_tabent_isempty(e)) return NULL;
while (1) {
if (eql(e->key, key)) return e;
@@ -96,8 +122,13 @@ static const upb_tabent *findentry(const upb_table *t, upb_tabkey key,
}
}
-static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v,
- hashfunc_t *hash, eqlfunc_t *eql) {
+static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
+ uint32_t hash, eqlfunc_t *eql) {
+ return (upb_tabent*)findentry(t, key, hash, eql);
+}
+
+static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
+ uint32_t hash, eqlfunc_t *eql) {
const upb_tabent *e = findentry(t, key, hash, eql);
if (e) {
if (v) {
@@ -110,13 +141,13 @@ static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v,
}
// The given key must not already exist in the table.
-static void insert(upb_table *t, upb_tabkey key, upb_value val,
- hashfunc_t *hash, eqlfunc_t *eql) {
+static void insert(upb_table *t, lookupkey_t key, upb_value val,
+ uint32_t hash, hashfunc_t *hashfunc, eqlfunc_t *eql) {
UPB_UNUSED(eql);
assert(findentry(t, key, hash, eql) == NULL);
assert(val.ctype == t->ctype);
t->count++;
- upb_tabent *mainpos_e = (upb_tabent*)hash(t, key);
+ upb_tabent *mainpos_e = getentry_mutable(t, hash);
upb_tabent *our_e = mainpos_e;
if (upb_tabent_isempty(mainpos_e)) {
// Our main position is empty; use it.
@@ -125,7 +156,7 @@ static void insert(upb_table *t, upb_tabkey key, upb_value val,
// Collision.
upb_tabent *new_e = emptyent(t);
// Head of collider's chain.
- upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key);
+ upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
// Existing ent is in its main posisiton (it has the same hash as us, and
// is the head of our chain). Insert to new ent and append to this chain.
@@ -146,14 +177,14 @@ static void insert(upb_table *t, upb_tabkey key, upb_value val,
our_e->next = NULL;
}
}
- our_e->key = key;
+ our_e->key = key.key;
our_e->val = val.val;
assert(findentry(t, key, hash, eql) == our_e);
}
-static bool rm(upb_table *t, upb_tabkey key, upb_value *val,
- upb_tabkey *removed, hashfunc_t *hash, eqlfunc_t *eql) {
- upb_tabent *chain = (upb_tabent*)hash(t, key);
+static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
+ upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
+ upb_tabent *chain = getentry_mutable(t, hash);
if (upb_tabent_isempty(chain)) return false;
if (eql(chain->key, key)) {
// Element to remove is at the head of its chain.
@@ -210,13 +241,12 @@ static size_t begin(const upb_table *t) {
// A simple "subclass" of upb_table that only adds a hash function for strings.
-static const upb_tabent *strhash(const upb_table *t, upb_tabkey key) {
- // Could avoid the strlen() by using a hash function that terminates on NULL.
- return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
+static uint32_t strhash(upb_tabkey key) {
+ return MurmurHash2(key.str, strlen(key.str), 0);
}
-static bool streql(upb_tabkey k1, upb_tabkey k2) {
- return strcmp(k1.str, k2.str) == 0;
+static bool streql(upb_tabkey k1, lookupkey_t k2) {
+ return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0';
}
bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
@@ -252,17 +282,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
}
}
if ((k = upb_strdup(k)) == NULL) return false;
- insert(&t->t, strkey(k), v, &strhash, &streql);
+
+ lookupkey_t key = strkey(k);
+ uint32_t hash = MurmurHash2(key.key.str, key.len, 0);
+ insert(&t->t, strkey(k), v, hash, &strhash, &streql);
return true;
}
-bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v) {
- return lookup(&t->t, strkey(key), v, &strhash, &streql);
+bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
+ upb_value *v) {
+ uint32_t hash = MurmurHash2(key, len, 0);
+ return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
+ uint32_t hash = MurmurHash2(key, strlen(key), 0);
upb_tabkey tabkey;
- if (rm(&t->t, strkey(key), val, &tabkey, &strhash, &streql)) {
+ if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) {
free((void*)tabkey.str);
return true;
} else {
@@ -317,8 +353,10 @@ bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
// For inttables we use a hybrid structure where small keys are kept in an
// array and large keys are put in the hash table.
-static bool inteql(upb_tabkey k1, upb_tabkey k2) {
- return k1.num == k2.num;
+static uint32_t inthash(upb_tabkey key) { return upb_inthash(key.num); }
+
+static bool inteql(upb_tabkey k1, lookupkey_t k2) {
+ return k1.num == k2.key.num;
}
static _upb_value *mutable_array(upb_inttable *t) {
@@ -330,7 +368,7 @@ static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) {
return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
} else {
upb_tabent *e =
- (upb_tabent*)findentry(&t->t, upb_intkey(key), &upb_inthash, &inteql);
+ findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
return e ? &e->val : NULL;
}
}
@@ -402,7 +440,8 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
const upb_tabent *e = &t->t.entries[i];
upb_value v;
_upb_value_setval(&v, e->val, t->t.ctype);
- insert(&new_table, e->key, v, &upb_inthash, &inteql);
+ uint32_t hash = upb_inthash(e->key.num);
+ insert(&new_table, intkey(e->key.num), v, hash, &inthash, &inteql);
}
assert(t->t.count == new_table.count);
@@ -410,7 +449,7 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
uninit(&t->t);
t->t = new_table;
}
- insert(&t->t, upb_intkey(key), val, &upb_inthash, &inteql);
+ insert(&t->t, intkey(key), val, upb_inthash(key), &inthash, &inteql);
}
check(t);
return true;
@@ -446,7 +485,8 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
}
} else {
upb_tabkey removed;
- success = rm(&t->t, upb_intkey(key), val, &removed, &upb_inthash, &inteql);
+ uint32_t hash = upb_inthash(key);
+ success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
}
check(t);
return success;
diff --git a/upb/table.int.h b/upb/table.int.h
index 5e023c9..56891d7 100644
--- a/upb/table.int.h
+++ b/upb/table.int.h
@@ -25,6 +25,7 @@
#include <assert.h>
#include <stdint.h>
+#include <string.h>
#include "upb.h"
#ifdef __cplusplus
@@ -219,20 +220,27 @@ UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
return e->key.num == 0;
}
+// Used by some of the unit tests for generic hashing functionality.
+uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed);
+
UPB_INLINE upb_tabkey upb_intkey(uintptr_t key) {
- upb_tabkey k = {key}; return k;
+ upb_tabkey k;
+ k.num = key;
+ return k;
+}
+
+UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
+ return (uint32_t)key;
}
-UPB_INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
- return t->entries + ((uint32_t)key.num & t->mask);
+static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
+ return t->entries + (hash & t->mask);
}
UPB_INLINE bool upb_arrhas(_upb_value v) {
return v.uint64 != (uint64_t)UPB_ARRAY_EMPTYVAL;
}
-uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
-
// Initialize and uninitialize a table, respectively. If memory allocation
// failed, false is returned that the table is uninitialized.
bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype);
@@ -259,7 +267,14 @@ bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
// Looks up key in this table, returning "true" if the key was found.
// If v is non-NULL, copies the value for this key into *v.
bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
-bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v);
+bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
+ upb_value *v);
+
+// For NULL-terminated strings.
+UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
+ upb_value *v) {
+ return upb_strtable_lookup2(t, key, strlen(key), v);
+}
// Removes an item from the table. Returns true if the remove was successful,
// and stores the removed item in *val if non-NULL.
@@ -302,7 +317,7 @@ UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
} else {
const upb_tabent *e;
if (t->t.entries == NULL) return false;
- for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) {
+ for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
if ((uint32_t)e->key.num == key) {
_upb_value_setval(v, e->val, t->t.ctype);
return true;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback