From 3d0c7c45da5b72a88bfb03dc5ce3384b7f01cef6 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 18 Nov 2014 15:21:50 -0800 Subject: Sync to Google-internal development. --- Makefile | 59 +- tests/bindings/googlepb/test_vs_proto2.cc | 14 +- tests/bindings/ruby/upb.rb | 62 ++ tests/pb/test_decoder.cc | 18 + tools/dump_cinit.lua | 10 +- upb/bindings/googlepb/bridge.cc | 1 + upb/bindings/googlepb/proto1.cc | 27 + upb/bindings/googlepb/proto2.cc | 220 +++++- upb/bindings/lua/upb.c | 21 +- upb/bindings/ruby/README | 2 - upb/bindings/ruby/README.md | 30 + upb/bindings/ruby/extconf.rb | 8 +- upb/bindings/ruby/upb.c | 1209 +++++++++++++++++++++++------ upb/def.c | 25 +- upb/def.h | 85 +- upb/descriptor/descriptor.upb.c | 162 ++-- upb/descriptor/reader.c | 11 + upb/pb/compile_decoder.c | 308 ++++---- upb/pb/compile_decoder_x64.c | 2 +- upb/pb/compile_decoder_x64.dasc | 28 +- upb/pb/encoder.c | 769 +++++++++--------- upb/pb/encoder.h | 163 +++- upb/pb/varint.c | 23 + upb/pb/varint.int.h | 9 + upb/table.c | 106 ++- upb/table.int.h | 29 +- 26 files changed, 2416 insertions(+), 985 deletions(-) create mode 100644 tests/bindings/ruby/upb.rb delete mode 100644 upb/bindings/ruby/README create mode 100644 upb/bindings/ruby/README.md diff --git a/Makefile b/Makefile index 9c5bef0..0d5ce56 100644 --- a/Makefile +++ b/Makefile @@ -44,8 +44,8 @@ CXX=c++ CFLAGS=-std=c99 CXXFLAGS=-Wno-unused-private-field INCLUDE=-I. -CPPFLAGS=$(INCLUDE) -DNDEBUG -Wall -Wextra -Wno-sign-compare $(USER_CPPFLAGS) -LDLIBS=-lpthread upb/libupb.a +WARNFLAGS=-Wall -Wextra -Wno-sign-compare +CPPFLAGS=$(INCLUDE) -DNDEBUG $(USER_CPPFLAGS) LUA=lua # 5.1 and 5.2 should both be supported ifneq ($(WITH_JIT), no) @@ -91,6 +91,7 @@ clean_leave_profile: @rm -rf tools/upbc deps @rm -rf upb/bindings/python/build @rm -f upb/bindings/ruby/Makefile + @rm -f upb/bindings/ruby/upb.o @rm -f upb/bindings/ruby/upb.so @rm -f upb/bindings/ruby/mkmf.log @find . | grep dSYM | xargs rm -rf @@ -109,7 +110,7 @@ clean: clean_leave_profile # lib/lib%.a: $(call make_objs,o) # gcc -c -o $@ $^ # -# SECONDEXPANSION: flips on a bit essentially that allows this "seconary +# SECONDEXPANSION: flips on a bit essentially that allows this "secondary # expansion": it must appear before anything that uses make_objs. .SECONDEXPANSION: to_srcs = $(subst .,_,$(1)_SRCS) @@ -130,15 +131,16 @@ upb_SRCS = \ upb/upb.c \ upb_descriptor_SRCS = \ - upb/descriptor/reader.c \ upb/descriptor/descriptor.upb.c \ + upb/descriptor/reader.c \ upb_pb_SRCS = \ - upb/pb/decoder.c \ upb/pb/compile_decoder.c \ + upb/pb/decoder.c \ + upb/pb/encoder.c \ upb/pb/glue.c \ - upb/pb/varint.c \ upb/pb/textprinter.c \ + upb/pb/varint.c \ # If the JIT is enabled we include its source. # If Lua is present we can use DynASM to regenerate the .h file. @@ -146,7 +148,6 @@ ifdef USE_JIT upb_pb_SRCS += upb/pb/compile_decoder_x64.c obj/pb/compile_decoder_x64.o obj/pb/compile_decoder_x64.lo: upb/pb/compile_decoder_x64.h obj/pb/compile_decoder_x64.o: CFLAGS=-std=gnu99 -obj/pb/compile_decoder_x64.o: OPT=-Os upb/pb/compile_decoder_x64.h: upb/pb/compile_decoder_x64.dasc $(E) DYNASM $< @@ -164,6 +165,11 @@ lib/libupb.a : OPT = -Os lib/libupb.descriptor.a : OPT = -Os obj/pb/compile_decoder.o : OPT = -Os obj/pb/compile_decoder_64.o : OPT = -Os + +ifdef USE_JIT +obj/pb/compile_decoder_x64.o: OPT=-Os +endif + endif $(UPB_PICLIBS): lib/lib%_pic.a: $(call make_objs,lo) @@ -177,32 +183,32 @@ $(UPB_LIBS): lib/lib%.a: $(call make_objs,o) obj/%.o: upb/%.c | $$(@D)/. $(E) CC $< - $(Q) $(CC) $(OPT) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< + $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< obj/%.o: upb/%.cc | $$(@D)/. $(E) CXX $< - $(Q) $(CXX) $(OPT) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< obj/%.lo: upb/%.c | $$(@D)/. $(E) 'CC -fPIC' $< - $(Q) $(CC) $(OPT) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -fPIC + $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -fPIC obj/%.lo: upb/%.cc | $$(@D)/. $(E) CXX $< - $(Q) $(CXX) $(OPT) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -fPIC + $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -fPIC # Note: mkdir -p is technically susceptible to races when used with make -j. %/.: $(Q) mkdir -p $@ # Regenerating the auto-generated files in upb/. -upb/descriptor.pb: upb/descriptor.proto +upb/descriptor/descriptor.pb: upb/descriptor/descriptor.proto @# TODO: replace with upbc - protoc upb/descriptor.proto -oupb/descriptor.pb + protoc upb/descriptor/descriptor.proto -oupb/descriptor/descriptor.pb -descriptorgen: upb/descriptor.pb tools/upbc +descriptorgen: upb/descriptor/descriptor.pb tools/upbc @# Regenerate descriptor_const.h - ./tools/upbc -o upb/descriptor upb/descriptor.pb + ./tools/upbc -o upb/descriptor/descriptor upb/descriptor/descriptor.pb tools/upbc: tools/upbc.c $(LIBUPB) $(E) CC $< @@ -232,15 +238,15 @@ tests: $(TESTS) tests/testmain.o: tests/testmain.cc $(E) CXX $< - $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< + $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< $(C_TESTS): % : %.c tests/testmain.o $$(LIBS) $(E) CC $< - $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ tests/testmain.o $< $(LIBS) + $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -o $@ tests/testmain.o $< $(LIBS) $(CC_TESTS): % : %.cc tests/testmain.o $$(LIBS) $(E) CXX $< - $(Q) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -Wno-deprecated -o $@ tests/testmain.o $< $(LIBS) + $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -Wno-deprecated -o $@ tests/testmain.o $< $(LIBS) # Several of these tests don't actually test these libs, but use them # incidentally to load a descriptor @@ -337,7 +343,7 @@ GOOGLEPB_TEST_DEPS = \ tests/bindings/googlepb/test_vs_proto2.googlemessage1: $(GOOGLEPB_TEST_DEPS) \ benchmarks/googlemessage1.h $(E) CXX $< '(benchmarks::SpeedMessage1)' - $(Q) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \ + $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \ -DMESSAGE_CIDENT="benchmarks::SpeedMessage1" \ -DMESSAGE_DATA_HFILE=\"benchmarks/googlemessage1.h\" \ benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread \ @@ -346,7 +352,7 @@ tests/bindings/googlepb/test_vs_proto2.googlemessage1: $(GOOGLEPB_TEST_DEPS) \ tests/bindings/googlepb/test_vs_proto2.googlemessage2: $(GOOGLEPB_TEST_DEPS) \ benchmarks/googlemessage2.h $(E) CXX $< '(benchmarks::SpeedMessage2)' - $(Q) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \ + $(Q) $(CXX) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \ -DMESSAGE_CIDENT="benchmarks::SpeedMessage2" \ -DMESSAGE_DATA_HFILE=\"benchmarks/googlemessage2.h\" \ benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread \ @@ -401,7 +407,7 @@ LUA_LIB_DEPS = \ upb/bindings/lua/upb.so: upb/bindings/lua/upb.c upb/bindings/lua/upb.lua.h $(LUA_LIB_DEPS) $(E) CC upb/bindings/lua/upb.c - $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS) $(LUA_LIB_DEPS) + $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS) $(LUA_LIB_DEPS) # TODO: the dependency between upb/pb.so and upb.so is expressed at the # .so level, which means that the OS will try to load upb.so when upb/pb.so @@ -414,7 +420,7 @@ upb/bindings/lua/upb.so: upb/bindings/lua/upb.c upb/bindings/lua/upb.lua.h $(LUA # be expressed at the .so level. upb/bindings/lua/upb/pb.so: upb/bindings/lua/upb/pb.c upb/bindings/lua/upb.so $(E) CC upb/bindings/lua/upb.pb.c - $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< upb/bindings/lua/upb.so $(LUA_LDFLAGS) + $(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< upb/bindings/lua/upb.so $(LUA_LDFLAGS) # Python extension ############################################################# @@ -434,9 +440,16 @@ pythontest: $(PYTHONEXT) RUBY=ruby RUBYEXT=upb/bindings/ruby/upb.so ruby: $(RUBYEXT) + +# We pass our important flags to Ruby, but leave the warning flags out. +# Some uses of the Ruby/C API trigger the warnings we normally use, so +# we let Ruby decide the set of warning options to use. upb/bindings/ruby/Makefile: upb/bindings/ruby/extconf.rb lib/libupb_pic.a lib/libupb.pb_pic.a lib/libupb.descriptor_pic.a $(E) RUBY upb/bindings/ruby/extconf.rb - $(Q) cd upb/bindings/ruby && ruby extconf.rb + $(Q) cd upb/bindings/ruby && ruby extconf.rb "$(OPT) $(CPPFLAGS) $(CFLAGS)" $(RUBYEXT): upb/bindings/ruby/upb.c upb/bindings/ruby/Makefile $(E) CC upb/bindings/ruby/upb.c $(Q) cd upb/bindings/ruby && make + +rubytest: $(RUBYEXT) upb/descriptor/descriptor.pb + RUBYLIB="upb/bindings/ruby" ruby tests/bindings/ruby/upb.rb diff --git a/tests/bindings/googlepb/test_vs_proto2.cc b/tests/bindings/googlepb/test_vs_proto2.cc index 2d53f80..15a5388 100644 --- a/tests/bindings/googlepb/test_vs_proto2.cc +++ b/tests/bindings/googlepb/test_vs_proto2.cc @@ -27,8 +27,14 @@ #include "upb/pb/glue.h" #include "upb/pb/varint.int.h" -static const char message_data[] = { -#include MESSAGE_DATA_HFILE +// Pull in string data from benchmarks/google_message{1,2}.dat +// (the .h files are generated with xxd). +const char message1_data[] = { +#include "benchmarks/google_message1.h" +}; + +const char message2_data[] = { +#include "benchmarks/google_message2.h" }; void compare_metadata(const google::protobuf::Descriptor* d, @@ -117,8 +123,8 @@ extern "C" { int run_tests(int argc, char *argv[]) { UPB_UNUSED(argc); UPB_UNUSED(argv); - size_t len = sizeof(message_data); - const char *str = message_data; + size_t len = sizeof(MESSAGE_DATA_IDENT); + const char *str = MESSAGE_DATA_IDENT; MESSAGE_CIDENT msg1; MESSAGE_CIDENT msg2; diff --git a/tests/bindings/ruby/upb.rb b/tests/bindings/ruby/upb.rb new file mode 100644 index 0000000..3e06c17 --- /dev/null +++ b/tests/bindings/ruby/upb.rb @@ -0,0 +1,62 @@ +#!/usr/bin/ruby +# +# Tests for Ruby upb extension. + +require 'test/unit' +require 'set' +require 'upb' + +def get_descriptor + File.open("upb/descriptor/descriptor.pb").read +end + +def load_descriptor + symtab = Upb::SymbolTable.new + symtab.load_descriptor(get_descriptor()) + return symtab +end + +def get_message_class(name) + return Upb.get_message_class(load_descriptor().lookup(name)) +end + +class TestRubyExtension < Test::Unit::TestCase + def test_parsedescriptor + msgdef = load_descriptor.lookup("google.protobuf.FileDescriptorSet") + assert_instance_of(Upb::MessageDef, msgdef) + + file_descriptor_set = Upb.get_message_class(msgdef) + msg = file_descriptor_set.parse(get_descriptor()) + + # A couple message types we know should exist. + names = Set.new(["DescriptorProto", "FieldDescriptorProto"]) + + msg.file.each { |file| + file.message_type.each { |message_type| + names.delete(message_type.name) + } + } + + assert_equal(0, names.size) + end + + def test_parseserialize + field_descriptor_proto = get_message_class("google.protobuf.FieldDescriptorProto") + field_options = get_message_class("google.protobuf.FieldOptions") + + field = field_descriptor_proto.new + + field.name = "MyName" + field.number = 5 + field.options = field_options.new + field.options.packed = true + + serialized = Upb::Message.serialize(field) + + field2 = field_descriptor_proto.parse(serialized) + + assert_equal("MyName", field2.name) + assert_equal(5, field2.number) + assert_equal(true, field2.options.packed) + end +end diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc index 2d94d82..d976a54 100644 --- a/tests/pb/test_decoder.cc +++ b/tests/pb/test_decoder.cc @@ -207,8 +207,16 @@ void indentbuf(string *buf, int depth) { buf->append(2 * depth, ' '); } +void check_stack_alignment() { +#ifdef UPB_USE_JIT_X64 + void *rsp = __builtin_frame_address(0); + ASSERT(((uintptr_t)rsp % 16) == 0); +#endif +} + #define NUMERIC_VALUE_HANDLER(member, ctype, fmt) \ bool value_##member(int* depth, const uint32_t* num, ctype val) { \ + check_stack_alignment(); \ indentbuf(&output, *depth); \ appendf(&output, "%" PRIu32 ":%" fmt "\n", *num, val); \ return true; \ @@ -222,12 +230,14 @@ NUMERIC_VALUE_HANDLER(float, float, "g") NUMERIC_VALUE_HANDLER(double, double, "g") bool value_bool(int* depth, const uint32_t* num, bool val) { + check_stack_alignment(); indentbuf(&output, *depth); appendf(&output, "%" PRIu32 ":%s\n", *num, val ? "true" : "false"); return true; } int* startstr(int* depth, const uint32_t* num, size_t size_hint) { + check_stack_alignment(); indentbuf(&output, *depth); appendf(&output, "%" PRIu32 ":(%zu)\"", *num, size_hint); return depth + 1; @@ -237,6 +247,7 @@ size_t value_string(int* depth, const uint32_t* num, const char* buf, size_t n, const upb::BufferHandle* handle) { UPB_UNUSED(num); UPB_UNUSED(depth); + check_stack_alignment(); output.append(buf, n); ASSERT(handle == &global_handle); return n; @@ -245,11 +256,13 @@ size_t value_string(int* depth, const uint32_t* num, const char* buf, bool endstr(int* depth, const uint32_t* num) { UPB_UNUSED(depth); UPB_UNUSED(num); + check_stack_alignment(); output.append("\"\n"); return true; } int* startsubmsg(int* depth, const uint32_t* num) { + check_stack_alignment(); indentbuf(&output, *depth); appendf(&output, "%" PRIu32 ":{\n", *num); return depth + 1; @@ -257,12 +270,14 @@ int* startsubmsg(int* depth, const uint32_t* num) { bool endsubmsg(int* depth, const uint32_t* num) { UPB_UNUSED(num); + check_stack_alignment(); indentbuf(&output, *depth); output.append("}\n"); return true; } int* startseq(int* depth, const uint32_t* num) { + check_stack_alignment(); indentbuf(&output, *depth); appendf(&output, "%" PRIu32 ":[\n", *num); return depth + 1; @@ -270,12 +285,14 @@ int* startseq(int* depth, const uint32_t* num) { bool endseq(int* depth, const uint32_t* num) { UPB_UNUSED(num); + check_stack_alignment(); indentbuf(&output, *depth); output.append("]\n"); return true; } bool startmsg(int* depth) { + check_stack_alignment(); indentbuf(&output, *depth); output.append("<\n"); return true; @@ -283,6 +300,7 @@ bool startmsg(int* depth) { bool endmsg(int* depth, upb_status* status) { UPB_UNUSED(status); + check_stack_alignment(); indentbuf(&output, *depth); output.append(">\n"); return true; diff --git a/tools/dump_cinit.lua b/tools/dump_cinit.lua index 2988082..13e1f52 100644 --- a/tools/dump_cinit.lua +++ b/tools/dump_cinit.lua @@ -453,14 +453,14 @@ local function dump_defs_c(symtab, basename, namespace, append) intfmt = "0" end -- UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, - -- name, num, msgdef, subdef, selector_base, index, - -- default_value) - append(' UPB_FIELDDEF_INIT(%s, %s, %s, %s, %s, %s, "%s", %d, %s, %s, ' .. - '%d, %d, {0},' .. -- TODO: support default value + -- packed, name, num, msgdef, subdef, selector_base, + -- index, -- default_value) + append(' UPB_FIELDDEF_INIT(%s, %s, %s, %s, %s, %s, %s, "%s", %d, %s, ' .. + '%s, %d, %d, {0},' .. -- TODO: support default value '&reftables[%d], &reftables[%d]),\n', const(f, "label"), const(f, "type"), intfmt, boolstr(f:istagdelim()), boolstr(f:is_extension()), - boolstr(f:lazy()), f:name(), f:number(), + boolstr(f:lazy()), boolstr(f:packed()), f:name(), f:number(), linktab:addr(f:containing_type()), subdef, f:_selector_base(), f:index(), reftable, reftable + 1 diff --git a/upb/bindings/googlepb/bridge.cc b/upb/bindings/googlepb/bridge.cc index a666ff6..6ae8868 100644 --- a/upb/bindings/googlepb/bridge.cc +++ b/upb/bindings/googlepb/bridge.cc @@ -115,6 +115,7 @@ reffed_ptr DefBuilder::NewFieldDef(const goog::FieldDescriptor* f, upb_f->set_number(f->number(), &status); upb_f->set_label(FieldDef::ConvertLabel(f->label())); upb_f->set_descriptor_type(FieldDef::ConvertDescriptorType(f->type())); + upb_f->set_packed(f->options().packed()); #ifdef UPB_GOOGLE3 upb_f->set_lazy(f->options().lazy()); #endif diff --git a/upb/bindings/googlepb/proto1.cc b/upb/bindings/googlepb/proto1.cc index 0b46fed..68b572c 100644 --- a/upb/bindings/googlepb/proto1.cc +++ b/upb/bindings/googlepb/proto1.cc @@ -30,6 +30,10 @@ #undef private #undef protected +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS +namespace proto2 { class Arena; } +#endif + #include "upb/def.h" #include "upb/handlers.h" #include "upb/shim/shim.h" @@ -448,12 +452,35 @@ class P2R_Handlers { class RepeatedMessageTypeHandler { public: typedef proto2::Message Type; +#ifndef GOOGLE_PROTOBUF_HAS_ARENAS // AddAllocated() calls this, but only if other objects are sitting // around waiting for reuse, which we will not do. static void Delete(Type* t) { UPB_UNUSED(t); assert(false); } +#else + static ::proto2::Arena* GetArena(Type* t) { + return t->GetArena(); + } + static void* GetMaybeArenaPointer(Type* t) { + return t->GetMaybeArenaPointer(); + } + static inline Type* NewFromPrototype( + const Type* prototype, ::proto2::Arena* arena = NULL) { + return prototype->New(arena); + } + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t, ::proto2::Arena* arena) { + UPB_UNUSED(t); + UPB_UNUSED(arena); + assert(false); + } + static void Merge(const Type& from, Type* to) { + to->MergeFrom(from); + } +#endif }; // Closure is a RepeatedPtrField*, but we access it through diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc index 657f802..498ae2d 100644 --- a/upb/bindings/googlepb/proto2.cc +++ b/upb/bindings/googlepb/proto2.cc @@ -261,11 +261,64 @@ case goog::FieldDescriptor::cpptype: \ return r->offsets_[index]; } - class FieldOffset { + // Base class that provides access to elements of the message as a whole, such + // as the unknown-field set, and is inherited by context classes for specific + // field handlers. + class FieldDataBase { + public: + FieldDataBase(const goog::internal::GeneratedMessageReflection* r) + : unknown_fields_offset_(r->unknown_fields_offset_) +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + , arena_offset_(r->arena_offset_) +#endif // GOOGLE_PROTOBUF_HAS_ARENAS + {} + +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + goog::Arena* GetArena(const goog::Message& message) const { + if (unknown_fields_offset_ == + goog::internal::GeneratedMessageReflection:: + kUnknownFieldSetInMetadata) { + const goog::internal::InternalMetadataWithArena* metadata = + GetConstPointer( + &message, arena_offset_); + return metadata->arena(); + } else if (arena_offset_ != + goog::internal::GeneratedMessageReflection::kNoArenaPointer) { + return *GetConstPointer(&message, arena_offset_); + } else { + return NULL; + } + } + + goog::UnknownFieldSet* GetUnknownFieldSet(goog::Message* message) const { + if (unknown_fields_offset_ == + goog::internal::GeneratedMessageReflection:: + kUnknownFieldSetInMetadata) { + goog::internal::InternalMetadataWithArena* metadata = + GetPointer( + message, arena_offset_); + return metadata->mutable_unknown_fields(); + } + return GetPointer(message, unknown_fields_offset_); + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + goog::UnknownFieldSet* GetUnknownFieldSet(goog::Message* message) const { + return GetPointer(message, unknown_fields_offset_); + } +#endif // ifdef !GOOGLE_PROTOBUF_HAS_ARENAS + private: + int unknown_fields_offset_; +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + int arena_offset_; +#endif // GOOGLE_PROTOBUF_HAS_ARENAS + }; + + class FieldOffset : public FieldDataBase { public: FieldOffset(const goog::FieldDescriptor* f, const goog::internal::GeneratedMessageReflection* r) - : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { + : FieldDataBase(r), + offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { if (!is_repeated_) { int64_t hasbit = GetHasbit(f, r); hasbyte_ = hasbit / 8; @@ -293,11 +346,12 @@ case goog::FieldDescriptor::cpptype: \ }; #ifdef GOOGLE_PROTOBUF_HAS_ONEOF - class OneofFieldData { + class OneofFieldData : public FieldDataBase { public: OneofFieldData(const goog::FieldDescriptor* f, const goog::internal::GeneratedMessageReflection* r) - : field_number_offset_(GetOneofDiscriminantOffset(f, r)), + : FieldDataBase(r), + field_number_offset_(GetOneofDiscriminantOffset(f, r)), field_number_(f->number()) { const goog::OneofDescriptor* oneof = f->containing_oneof(); @@ -343,6 +397,40 @@ case goog::FieldDescriptor::cpptype: \ return GetPointer(message, field_number_offset_); } + void ClearOneof(goog::Message* m, const FieldOffset* ofs, + int field_number) const { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + if (GetArena(*m) != NULL) { + return; + } +#endif + switch (types_.at(field_number)) { + case ONEOF_TYPE_NONE: + break; + case ONEOF_TYPE_STRING: + delete *ofs->GetFieldPointer(m); + break; + case ONEOF_TYPE_MESSAGE: + delete *ofs->GetFieldPointer(m); + break; +#ifdef UPB_GOOGLE3 + case ONEOF_TYPE_GLOBALSTRING: + delete *ofs->GetFieldPointer(m); + break; + case ONEOF_TYPE_CORD: + delete *ofs->GetFieldPointer(m); + break; + case ONEOF_TYPE_STRINGPIECE: + delete *ofs->GetFieldPointer< + goog::internal::StringPieceField*>(m); + break; + case ONEOF_TYPE_LAZYFIELD: + delete *ofs->GetFieldPointer(m); + break; +#endif + } + } + // Returns whether this is different than the previous value of the // field_number; this implies that the current value was freed (if // necessary) and the caller should allocate a new instance. @@ -351,30 +439,7 @@ case goog::FieldDescriptor::cpptype: \ if (*field_number == field_number_) { return false; } else { - switch (types_.at(*field_number)) { - case ONEOF_TYPE_NONE: - break; - case ONEOF_TYPE_STRING: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_MESSAGE: - delete *ofs->GetFieldPointer(m); - break; -#ifdef UPB_GOOGLE3 - case ONEOF_TYPE_GLOBALSTRING: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_CORD: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_STRINGPIECE: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_LAZYFIELD: - delete *ofs->GetFieldPointer(m); - break; -#endif - } + ClearOneof(m, ofs, *field_number); *field_number = field_number_; return true; } @@ -578,7 +643,6 @@ case goog::FieldDescriptor::cpptype: \ const upb::FieldDef* f) : FieldOffset(proto2_f, r), field_number_(f->number()), - unknown_fields_offset_(r->unknown_fields_offset_), enum_(upb_downcast_enumdef(f->subdef())) {} bool IsValidValue(int32_t val) const { @@ -587,13 +651,8 @@ case goog::FieldDescriptor::cpptype: \ int32_t field_number() const { return field_number_; } - goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { - return GetPointer(m, unknown_fields_offset_); - } - private: int32_t field_number_; - size_t unknown_fields_offset_; const upb::EnumDef* enum_; }; @@ -617,7 +676,7 @@ case goog::FieldDescriptor::cpptype: \ *message_val = val; data->SetHasbit(m); } else { - data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + data->GetUnknownFieldSet(m)->AddVarint(data->field_number(), val); } } @@ -631,7 +690,7 @@ case goog::FieldDescriptor::cpptype: \ data->GetFieldPointer >(m); r->Add(val); } else { - data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + data->GetUnknownFieldSet(m)->AddVarint(data->field_number(), val); } } @@ -718,7 +777,14 @@ case goog::FieldDescriptor::cpptype: \ T** str = data->GetStringPointer(m); data->SetHasbit(m); // If it points to the default instance, we must create a new instance. - if (*str == data->prototype()) *str = new T(); + if (*str == data->prototype()) { + *str = new T(); +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + if (data->GetArena(*m)) { + data->GetArena(*m)->Own(*str); + } +#endif + } (*str)->clear(); // reserve() here appears to hurt performance rather than help. return *str; @@ -749,6 +815,16 @@ case goog::FieldDescriptor::cpptype: \ T** str = ofs->GetFieldPointer(m); if (data->SetOneofHas(m)) { *str = new T(); +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + // Note that in the main proto2-arenas implementation, the parsing code + // creates ArenaString instances for string field data, and the + // implementation later dynamically converts to ::string if a mutable + // version is requested. To keep complexity down in this binding, we + // create an ordinary string and allow the arena to own its destruction. + if (data->GetArena(*m) != NULL) { + data->GetArena(*m)->Own(*str); + } +#endif } else { (*str)->clear(); } @@ -857,7 +933,11 @@ case goog::FieldDescriptor::cpptype: \ data->SetHasbit(m); goog::Message** subm = data->GetFieldPointer(m); if (*subm == NULL || *subm == data->prototype()) { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + *subm = data->prototype()->New(data->GetArena(*m)); +#else *subm = data->prototype()->New(); +#endif } return *subm; } @@ -865,14 +945,50 @@ case goog::FieldDescriptor::cpptype: \ class RepeatedMessageTypeHandler { public: typedef goog::Message Type; +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + static ::proto2::Arena* GetArena(Type* t) { + return t->GetArena(); + } + static void* GetMaybeArenaPointer(Type* t) { + return t->GetMaybeArenaPointer(); + } + static inline Type* NewFromPrototype( + const Type* prototype, ::proto2::Arena* arena = NULL) { + return prototype->New(arena); + } + static void Delete(Type* t, goog::Arena* arena = NULL) { + if (arena == NULL) { + delete t; + } + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + static inline Type* NewFromPrototype(const Type* prototype) { + return prototype->New(); + } // AddAllocated() calls this, but only if other objects are sitting // around waiting for reuse, which we will not do. static void Delete(Type* t) { UPB_UNUSED(t); assert(false); } +#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + + static void Merge(const Type& from, Type* to) { + to->MergeFrom(from); + } }; +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static goog::Message* StartRepeatedSubMessage( + goog::internal::RepeatedPtrFieldBase* r, + const SubMessageHandlerData* data) { + goog::Message* submsg = data->prototype()->New(r->GetArenaNoVirtual()); + r->AddAllocated(submsg); + return submsg; + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS // Closure is a RepeatedPtrField*, but we access it through // its base class RepeatedPtrFieldBase*. static goog::Message* StartRepeatedSubMessage( @@ -886,13 +1002,19 @@ case goog::FieldDescriptor::cpptype: \ return submsg; } +#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + #ifdef GOOGLE_PROTOBUF_HAS_ONEOF static goog::Message* StartOneofSubMessage( goog::Message* m, const OneofSubMessageHandlerData* data) { const FieldOffset* ofs = data; goog::Message** subm = ofs->GetFieldPointer(m); if (data->SetOneofHas(m)) { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + *subm = data->prototype()->New(data->GetArena(*m)); +#else *subm = data->prototype()->New(); +#endif } return *subm; } @@ -1123,9 +1245,21 @@ case goog::FieldDescriptor::cpptype: \ LazyMessageExtensionImpl() {} virtual ~LazyMessageExtensionImpl() {} +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + virtual LazyMessageExtension* New() const { + return New(NULL); + } + + virtual LazyMessageExtension* New(proto2::Arena* arena) const { + LazyMessageExtensionImpl* message = + ::proto2::Arena::Create(arena); + return message; + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS virtual LazyMessageExtension* New() const { return new LazyMessageExtensionImpl(); } +#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS virtual const proto2::MessageLite& GetMessage( const proto2::MessageLite& prototype) const { @@ -1149,6 +1283,12 @@ case goog::FieldDescriptor::cpptype: \ static_cast(prototype)); } + virtual proto2::MessageLite* UnsafeArenaReleaseMessage( + const proto2::MessageLite& prototype) { + return lazy_field_.UnsafeArenaReleaseByPrototype( + static_cast(prototype)); + } + virtual bool IsInitialized() const { return true; } virtual int ByteSize() const { return lazy_field_.MessageByteSize(); } @@ -1201,7 +1341,13 @@ case goog::FieldDescriptor::cpptype: \ LazyMessageExtensionImpl* lazy_extension; if (set->MaybeNewExtension(data->number(), data->field_descriptor(), &item)) { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + lazy_extension = + ::proto2::Arena::Create( + m->GetArena()); +#else lazy_extension = new LazyMessageExtensionImpl(); +#endif item->type = UPB_DESCRIPTOR_TYPE_MESSAGE; item->is_repeated = false; item->is_lazy = true; diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index f257430..2bd78af 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -640,6 +640,12 @@ static int lupb_fielddef_number(lua_State *L) { return 1; } +static int lupb_fielddef_packed(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_packed(f)); + return 1; +} + static int lupb_fielddef_subdef(lua_State *L) { const upb_fielddef *f = lupb_fielddef_check(L, 1); if (!upb_fielddef_hassubdef(f)) @@ -753,6 +759,12 @@ static int lupb_fielddef_setnumber(lua_State *L) { return 0; } +static int lupb_fielddef_setpacked(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + upb_fielddef_setpacked(f, lupb_checkbool(L, 2)); + return 0; +} + static int lupb_fielddef_setsubdef(lua_State *L) { upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); const upb_def *def = NULL; @@ -817,6 +829,7 @@ static const struct luaL_Reg lupb_fielddef_m[] = { {"lazy", lupb_fielddef_lazy}, {"name", lupb_fielddef_name}, {"number", lupb_fielddef_number}, + {"packed", lupb_fielddef_packed}, {"subdef", lupb_fielddef_subdef}, {"subdef_name", lupb_fielddef_subdefname}, {"type", lupb_fielddef_type}, @@ -828,6 +841,7 @@ static const struct luaL_Reg lupb_fielddef_m[] = { {"set_lazy", lupb_fielddef_setlazy}, {"set_name", lupb_fielddef_setname}, {"set_number", lupb_fielddef_setnumber}, + {"set_packed", lupb_fielddef_setpacked}, {"set_subdef", lupb_fielddef_setsubdef}, {"set_subdef_name", lupb_fielddef_setsubdefname}, {"set_type", lupb_fielddef_settype}, @@ -926,7 +940,7 @@ static int lupb_msgdef_field(lua_State *L) { if (type == LUA_TNUMBER) { f = upb_msgdef_itof(m, lua_tointeger(L, 2)); } else if (type == LUA_TSTRING) { - f = upb_msgdef_ntof(m, lua_tostring(L, 2)); + f = upb_msgdef_ntofz(m, lua_tostring(L, 2)); } else { const char *msg = lua_pushfstring(L, "number or string expected, got %s", luaL_typename(L, 2)); @@ -1358,8 +1372,9 @@ const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg) { static const upb_fielddef *lupb_msg_checkfield(lua_State *L, const lupb_msgdef *lmd, int fieldarg) { - const char *fieldname = luaL_checkstring(L, fieldarg); - const upb_fielddef *f = upb_msgdef_ntof(lmd->md, fieldname); + size_t len; + const char *fieldname = luaL_checklstring(L, fieldarg, &len); + const upb_fielddef *f = upb_msgdef_ntof(lmd->md, fieldname, len); if (!f) { const char *msg = lua_pushfstring(L, "no such field: %s", fieldname); diff --git a/upb/bindings/ruby/README b/upb/bindings/ruby/README deleted file mode 100644 index 50fd746..0000000 --- a/upb/bindings/ruby/README +++ /dev/null @@ -1,2 +0,0 @@ -This is PROTOTYPE code -- all interfaces are experimental -and will almost certainly change. diff --git a/upb/bindings/ruby/README.md b/upb/bindings/ruby/README.md new file mode 100644 index 0000000..12a7169 --- /dev/null +++ b/upb/bindings/ruby/README.md @@ -0,0 +1,30 @@ + +# Ruby extension + +To build, run (from the top upb directory): + + $ make ruby + $ sudo make install + +To test, run: + + $ make rubytest + +The binding currently supports: + + - loading message types from descriptors. + - constructing message instances + - reading and writing their members + - parsing and serializing the messages + - all data types (including nested and repeated) + +The binding does *not* currently support: + + - defining message types directly in Ruby code. + - generating Ruby code for a .proto file. + - type-checking for setters + - homogenous / type-checked arrays + - default values + +Because code generation is not currently implemented, the interface to import +a specific message type is kind of clunky for the moment. diff --git a/upb/bindings/ruby/extconf.rb b/upb/bindings/ruby/extconf.rb index 3637511..b105948 100644 --- a/upb/bindings/ruby/extconf.rb +++ b/upb/bindings/ruby/extconf.rb @@ -1,9 +1,13 @@ #!/usr/bin/ruby require 'mkmf' + +# Extra args are passed on the command-line. +$CFLAGS += (" " + ARGV[0]) + find_header("upb/upb.h", "../../..") or raise "Can't find upb headers" find_library("upb_pic", "upb_msgdef_new", "../../../lib") or raise "Can't find upb lib" -find_library("upb.pb_pic", "upb_decoder_init", "../../../lib") or raise "Can't find upb.pb lib" find_library("upb.descriptor_pic", "upb_descreader_init", "../../../lib") or raise "Can't find upb.descriptor lib" -$CFLAGS += " -Wall" +find_library("upb.pb_pic", "upb_pbdecoder_init", "../../../lib") or raise "Can't find upb.pb lib" + create_makefile("upb") diff --git a/upb/bindings/ruby/upb.c b/upb/bindings/ruby/upb.c index 0d25610..2817a15 100644 --- a/upb/bindings/ruby/upb.c +++ b/upb/bindings/ruby/upb.c @@ -1,42 +1,41 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * upb (prototype) extension for Ruby. + */ + +#include "ruby/ruby.h" +#include "ruby/vm.h" -#include "ruby.h" #include "upb/def.h" #include "upb/handlers.h" #include "upb/pb/decoder.h" +#include "upb/pb/encoder.h" #include "upb/pb/glue.h" #include "upb/shim/shim.h" #include "upb/symtab.h" +// References to global state. +// +// Ruby does not have multi-VM support and it is common practice to store +// references to classes and other per-VM state in global variables. +static VALUE cSymbolTable; static VALUE cMessageDef; static VALUE cMessage; +static VALUE message_map; +static upb_inttable objcache; +static bool objcache_initialized = false; -// Wrapper around a upb_msgdef. -typedef struct { - // The msgdef for this message, and a DecoderMethod to parse protobufs and - // fill a message. - // - // We own refs on both of these. - const upb_msgdef *md; - const upb_pbdecodermethod *fill_method; - - size_t size; - uint32_t *field_offsets; -} rb_msgdef; - -// Ruby message object. -// This will be sized according to what fields are actually present. -typedef struct { - union u { - VALUE rbmsgdef; - char data[1]; - } data; -} rb_msg; +struct rupb_Message; +struct rupb_MessageDef; +typedef struct rupb_Message rupb_Message; +typedef struct rupb_MessageDef rupb_MessageDef; -#define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs]) - -static void symtab_free(void *md) { - upb_symtab_unref(md, UPB_UNTRACKED_REF); -} +#define DEREF_RAW(ptr, ofs, type) *(type*)((char*)ptr + ofs) +#define DEREF(msg, ofs, type) *(type*)(&msg->data[ofs]) void rupb_checkstatus(upb_status *s) { if (!upb_ok(s)) { @@ -44,69 +43,195 @@ void rupb_checkstatus(upb_status *s) { } } -/* handlers *******************************************************************/ +static rupb_MessageDef *msgdef_get(VALUE self); +static rupb_Message *msg_get(VALUE self); +static const rupb_MessageDef *get_rbmsgdef(const upb_msgdef *md); +static const upb_handlers *new_fill_handlers(const rupb_MessageDef *rmd, + const void *owner); +static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd, + upb_sink *sink); +static VALUE msgdef_getwrapper(const upb_msgdef *md); +static VALUE new_message_class(VALUE message_def); +static VALUE get_message_class(VALUE klass, VALUE message); +static VALUE msg_new(VALUE msgdef); + +/* Ruby VALUE <-> C primitive conversions *************************************/ + +// Ruby VALUE -> C. +// TODO(haberman): add type/range/precision checks. +static float value_to_float(VALUE val) { return NUM2DBL(val); } +static double value_to_double(VALUE val) { return NUM2DBL(val); } +static bool value_to_bool(VALUE val) { return RTEST(val); } +static int32_t value_to_int32(VALUE val) { return NUM2INT(val); } +static uint32_t value_to_uint32(VALUE val) { return NUM2LONG(val); } +static int64_t value_to_int64(VALUE val) { return NUM2LONG(val); } +static uint64_t value_to_uint64(VALUE val) { return NUM2ULL(val); } + +// C -> Ruby VALUE +static VALUE float_to_value(float val) { return rb_float_new(val); } +static VALUE double_to_value(double val) { return rb_float_new(val); } +static VALUE bool_to_value(bool val) { return val ? Qtrue : Qfalse; } +static VALUE int32_to_value(int32_t val) { return INT2NUM(val); } +static VALUE uint32_to_value(uint32_t val) { return LONG2NUM(val); } +static VALUE int64_to_value(int64_t val) { return LONG2NUM(val); } +static VALUE uint64_to_value(uint64_t val) { return ULL2NUM(val); } + + +/* stringsink *****************************************************************/ + +// This should probably be factored into a common upb component. + +typedef struct { + upb_byteshandler handler; + upb_bytessink sink; + char *ptr; + size_t len, size; +} stringsink; + +static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { + stringsink *sink = _sink; + sink->len = 0; + return sink; +} -// These are handlers for populating a Ruby protobuf message when parsing. +static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, + size_t len, const upb_bufhandle *handle) { + UPB_UNUSED(hd); + UPB_UNUSED(handle); + + stringsink *sink = _sink; + size_t new_size = sink->size; + + while (sink->len + len > new_size) { + new_size *= 2; + } + + if (new_size != sink->size) { + sink->ptr = realloc(sink->ptr, new_size); + sink->size = new_size; + } + + memcpy(sink->ptr + sink->len, ptr, len); + sink->len += len; -static size_t strhandler(void *closure, const void *hd, const char *str, - size_t len, const upb_bufhandle *handle) { - rb_msg *msg = closure; - const size_t *ofs = hd; - DEREF(msg, *ofs, VALUE) = rb_str_new(str, len); return len; } -static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) { - size_t *hd_ofs = ALLOC(size_t); - *hd_ofs = ofs; - upb_handlers_addcleanup(h, hd_ofs, free); - return hd_ofs; +void stringsink_init(stringsink *sink) { + upb_byteshandler_init(&sink->handler); + upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); + upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); + + upb_bytessink_reset(&sink->sink, &sink->handler, sink); + + sink->size = 32; + sink->ptr = malloc(sink->size); } -static void add_handlers_for_message(const void *closure, upb_handlers *h) { - // XXX: Doesn't support submessages properly yet. - const rb_msgdef *rmd = closure; - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); +void stringsink_uninit(stringsink *sink) { + free(sink->ptr); +} - if (upb_fielddef_isseq(f)) { - rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet."); - } - size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; +/* object cache ***************************************************************/ - switch (upb_fielddef_type(f)) { - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_UINT32: - case UPB_TYPE_ENUM: - case UPB_TYPE_FLOAT: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - upb_shim_set(h, f, ofs, -1); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); - // XXX: does't currently handle split buffers. - upb_handlers_setstring(h, f, strhandler, &attr); - upb_handlerattr_uninit(&attr); - break; - } - case UPB_TYPE_MESSAGE: - rb_raise(rb_eRuntimeError, "Doesn't support submessages yet."); - break; - } +// The object cache is a singleton mapping of void* -> Ruby Object. +// It caches Ruby objects that wrap C objects. +// +// When we are wrapping C objects it is desirable to give them identity +// semantics. In other words, if you reach the same C object via two different +// paths, it is desirable (and sometimes even required) that you get the same +// wrapper object both times. If we instead just created a new wrapper object +// every time you ask for one, we could end up with unexpected results like: +// +// f1 = msgdef.field("request_id") +// f2 = msgdef.field("request_id") +// +// # equal? tests identity equality. Returns false without a cache. +// f1.equal?(f2) +// +// We do not register the cache with Ruby's GC, so being in this map will not +// keep the object alive. This is the desired behavior, because it lets objects +// be freed if they have no references from Ruby. We do require, though, that +// objects remove themselves from the map when they are freed. In this respect +// the cache operates like a weak map where the values are weak. + +typedef VALUE createfunc(const void *obj); + +// Call to initialize the cache. Should be done once on process startup. +static void objcache_init() { + upb_inttable_init(&objcache, UPB_CTYPE_UINT64); + objcache_initialized = true; +} + +// Call to uninitialize the cache. Should be done once on process shutdown. +static void objcache_uninit(ruby_vm_t *vm) { + assert(objcache_initialized); + assert(upb_inttable_count(&objcache) == 0); + + objcache_initialized = false; + upb_inttable_uninit(&objcache); +} + +// Looks up the given object in the cache. If the corresponding Ruby wrapper +// object is found, returns it, otherwise creates the wrapper and returns that. +static VALUE objcache_getorcreate(const void *obj, createfunc *func) { + assert(objcache_initialized); + + upb_value v; + if (!upb_inttable_lookupptr(&objcache, obj, &v)) { + v = upb_value_uint64(func(obj)); + upb_inttable_insertptr(&objcache, obj, v); } + return upb_value_getuint64(v); } -// Creates upb handlers for populating a message. -static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd, - const void *owner) { - return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd); +// Removes the given object from the cache. Should only be called by the code +// that is freeing the wrapper object. +static void objcache_remove(const void *obj) { + assert(objcache_initialized); + + bool removed = upb_inttable_removeptr(&objcache, obj, NULL); + UPB_ASSERT_VAR(removed, removed); +} + +/* message layout *************************************************************/ + +// We layout Ruby messages using a raw block of C memory. We assign offsets for +// each member so that instances are laid out like a C struct instead of as +// instance variables. This saves both memory and CPU. + +typedef struct { + // The size of the block of memory we should allocate for instances. + size_t size; + + // Prototype to memcpy() onto new message instances. Size is "size" above. + void *prototype; + + // An offset for each member, indexed by upb_fielddef_index(f). + uint32_t *field_offsets; +} rb_msglayout; + +// Returns true for fields where the field value we store is a Ruby VALUE (ie. a +// direct pointer to another Ruby object) instead of storing the value directly +// in the message. +static bool is_ruby_value(const upb_fielddef *f) { + if (upb_fielddef_isseq(f)) { + // Repeated fields are pointers to arrays. + return true; + } + + if (upb_fielddef_issubmsg(f)) { + // Submessage fields are pointers to submessages. + return true; + } + + if (upb_fielddef_isstring(f)) { + // String fields are pointers to string objects. + return true; + } + + return false; } // General alignment rules are that each type needs to be stored at an address @@ -116,8 +241,12 @@ static size_t align_up(size_t val, size_t align) { } // Byte size to store each upb type. -static size_t rupb_sizeof(upb_fieldtype_t type) { - switch (type) { +static size_t rupb_sizeof(const upb_fielddef *f) { + if (is_ruby_value(f)) { + return sizeof(VALUE); + } + + switch (upb_fielddef_type(f)) { case UPB_TYPE_BOOL: return 1; case UPB_TYPE_INT32: @@ -129,15 +258,228 @@ static size_t rupb_sizeof(upb_fieldtype_t type) { case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: return 8; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - return sizeof(VALUE); + default: + break; } assert(false); + return 0; } -/* msg ************************************************************************/ +// Calculates offsets for each field. +// +// This lets us pack protos like structs instead of storing them like +// dictionaries. This speeds up a parsing a lot and also saves memory +// (unless messages are very sparse). +static void assign_offsets(rb_msglayout *layout, const upb_msgdef *md) { + layout->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); + size_t ofs = 0; + upb_msg_iter i; + + for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + size_t field_size = rupb_sizeof(f); + + // Align field properly. + // + // TODO(haberman): optimize layout? For example we could sort fields + // big-to-small. + ofs = align_up(ofs, field_size); + + layout->field_offsets[upb_fielddef_index(f)] = ofs; + ofs += field_size; + } + + layout->size = ofs; +} + +// Creates a prototype; a buffer we can memcpy() onto new instances to +// initialize them. +static void make_prototype(rb_msglayout *layout, const upb_msgdef *md) { + void *prototype = ALLOC_N(char, layout->size); + + // Most members default to zero, so we'll start from that and then overwrite + // more specific initialization. + memset(prototype, 0, layout->size); + + upb_msg_iter i; + for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + if (is_ruby_value(f)) { + size_t ofs = layout->field_offsets[upb_fielddef_index(f)]; + // Default all Ruby pointers to nil. + DEREF_RAW(prototype, ofs, VALUE) = Qnil; + } + } + + layout->prototype = prototype; +} + + +static void msglayout_init(rb_msglayout *layout, const upb_msgdef *m) { + assign_offsets(layout, m); + make_prototype(layout, m); +} + +static void msglayout_uninit(rb_msglayout *layout) { + free(layout->field_offsets); + free(layout->prototype); +} + + +/* Upb::MessageDef ************************************************************/ + +// C representation for Upb::MessageDef. +// +// Contains a reference to the underlying upb_msgdef, as well as associated data +// like a reference to the corresponding Ruby class. +struct rupb_MessageDef { + // We own refs on all of these. + + // The upb_msgdef we are wrapping. + const upb_msgdef *md; + + // A DecoderMethod for parsing a protobuf into this type. + const upb_pbdecodermethod *fill_method; + + // Handlers for serializing into a protobuf of this type. + const upb_handlers *serialize_handlers; + + // The Ruby class for instances of this type. + VALUE klass; + + // Layout for messages of this type. + rb_msglayout layout; +}; + +// Called by the Ruby GC when a Upb::MessageDef is being freed. +static void msgdef_free(void *_rmd) { + rupb_MessageDef *rmd = _rmd; + objcache_remove(rmd->md); + upb_msgdef_unref(rmd->md, &rmd->md); + if (rmd->fill_method) { + upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method); + } + if (rmd->serialize_handlers) { + upb_handlers_unref(rmd->serialize_handlers, &rmd->serialize_handlers); + } + msglayout_uninit(&rmd->layout); + free(rmd); +} + +// Called by the Ruby GC during the "mark" phase to decide what is still alive. +// We call rb_gc_mark on all Ruby VALUE pointers we reference. +static void msgdef_mark(void *_rmd) { + rupb_MessageDef *rmd = _rmd; + rb_gc_mark(rmd->klass); + + // Mark all submessage types. + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (upb_fielddef_issubmsg(f)) { + // If we were trying to be more aggressively lazy, the submessage might + // not be created and we only mark ones that are. + rb_gc_mark(msgdef_getwrapper(upb_fielddef_msgsubdef(f))); + } + } +} + +static const rb_data_type_t msgdef_type = {"Upb::MessageDef", + {msgdef_mark, msgdef_free, NULL}}; + +// TODO(haberman): do we need an alloc func? We want to prohibit dup and +// probably subclassing too. + +static rupb_MessageDef *msgdef_get(VALUE self) { + rupb_MessageDef *msgdef; + TypedData_Get_Struct(self, rupb_MessageDef, &msgdef_type, msgdef); + return msgdef; +} + +// Constructs the upb decoder method for parsing messages of this type. +const upb_pbdecodermethod *new_fillmsg_decodermethod(const rupb_MessageDef *rmd, + const void *owner) { + const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers); + upb_pbdecodermethodopts opts; + upb_pbdecodermethodopts_init(&opts, fill_handlers); + + const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); + upb_handlers_unref(fill_handlers, &fill_handlers); + return ret; +} + +// Constructs a new Ruby wrapper object around the given msgdef. +static VALUE make_msgdef(const void *_md) { + const upb_msgdef *md = _md; + rupb_MessageDef *rmd; + VALUE ret = + TypedData_Make_Struct(cMessageDef, rupb_MessageDef, &msgdef_type, rmd); + + upb_msgdef_ref(md, &rmd->md); + + rmd->md = md; + rmd->fill_method = NULL; + + // OPT: most of these things could be built lazily, when they are first + // needed. + msglayout_init(&rmd->layout, md); + + rmd->fill_method = NULL; + rmd->klass = new_message_class(ret); + rmd->serialize_handlers = + upb_pb_encoder_newhandlers(md, &rmd->serialize_handlers); + + return ret; +} + +// Accessor to get a decoder method for this message type. +// Constructs the decoder method lazily. +static const upb_pbdecodermethod *msgdef_decodermethod(rupb_MessageDef *rmd) { + if (!rmd->fill_method) { + rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method); + } + + return rmd->fill_method; +} + +static VALUE msgdef_getwrapper(const upb_msgdef *md) { + return objcache_getorcreate(md, make_msgdef); +} + +static const rupb_MessageDef *get_rbmsgdef(const upb_msgdef *md) { + return msgdef_get(msgdef_getwrapper(md)); +} + + +/* Upb::Message ***************************************************************/ + +// Code to implement the Upb::Message object. +// +// A unique Ruby class is generated for each message type, but all message types +// share Upb::Message as their base class. Upb::Message contains all of the +// actual functionality; the only reason the derived class exists at all is +// for convenience. It lets Ruby users do things like: +// +// message = MyMessage.new +// if message.kind_of?(MyMessage) +// +// ... and other similar things that Ruby users expect they can do. + +// C representation of Upb::Message. +// +// Represents a message instance, laid out like a C struct in a type-specific +// layout. +// +// This will be sized according to what fields are actually present. +struct rupb_Message { + VALUE rbmsgdef; + char data[]; +}; + +// Returns the size of a message instance. +size_t msg_size(const rupb_MessageDef *rmd) { + return sizeof(rupb_Message) + rmd->layout.size; +} static void msg_free(void *msg) { free(msg); @@ -145,103 +487,170 @@ static void msg_free(void *msg) { // Invoked by the Ruby GC whenever it is doing a mark-and-sweep. static void msg_mark(void *p) { - rb_msg *msg = p; - rb_msgdef *rmd; - Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd); + rupb_Message *msg = p; + rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef); // Mark the msgdef to keep it alive. - rb_gc_mark(msg->data.rbmsgdef); + rb_gc_mark(msg->rbmsgdef); // We need to mark all references to other Ruby values: strings, arrays, and - // submessages that we point to. Only strings are implemented so far. + // submessages that we point to. upb_msg_iter i; for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); - if (upb_fielddef_isstring(f)) { - size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; + if (is_ruby_value(f)) { + size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; rb_gc_mark(DEREF(msg, ofs, VALUE)); } } } -static VALUE msg_new(VALUE msgdef) { - const rb_msgdef *rmd; - Data_Get_Struct(msgdef, rb_msgdef, rmd); +static const rb_data_type_t msg_type = {"Upb::Message", + {msg_mark, msg_free, NULL}}; + +static rupb_Message *msg_get(VALUE self) { + rupb_Message *msg; + TypedData_Get_Struct(self, rupb_Message, &msg_type, msg); + return msg; +} + +// Instance variable name that we use to store a reference from the Ruby class +// for a message and its Upb::MessageDef. +// +// We avoid prefixing this by "@" to make it inaccessible by Ruby. +static const char *kMessageDefMemberName = "msgdef"; + +static VALUE msg_getmsgdef(VALUE klass) { + VALUE msgdef = rb_iv_get(klass, kMessageDefMemberName); - rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size); - memset(msg, 0, rmd->size); - msg->data.rbmsgdef = msgdef; + if (msgdef == Qnil) { + // TODO(haberman): If we want to allow subclassing, we might want to walk up + // the hierarchy looking for this member. + rb_raise(rb_eRuntimeError, + "Can't call on Upb::Message directly, only subclasses"); + } + + return msgdef; +} - VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg); +// Called by the Ruby VM when it wants to create a new message instance. +static VALUE msg_alloc(VALUE klass) { + VALUE msgdef = msg_getmsgdef(klass); + const rupb_MessageDef *rmd = msgdef_get(msgdef); + + rupb_Message *msg = (rupb_Message*)ALLOC_N(char, msg_size(rmd)); + msg->rbmsgdef = msgdef; + memcpy(&msg->data, rmd->layout.prototype, rmd->layout.size); + + VALUE ret = TypedData_Wrap_Struct(klass, &msg_type, msg); return ret; } -static const upb_fielddef *lookup_field(rb_msg *msg, const char *field, - size_t *ofs) { - const rb_msgdef *rmd; - Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd); - const upb_fielddef *f = upb_msgdef_ntof(rmd->md, field); +// Creates a new Ruby class for the given Upb::MessageDef. The new class +// derives from Upb::Message but also stores a reference to the Upb::MessageDef. +static VALUE new_message_class(VALUE message_def) { + msgdef_get(message_def); // Check type. + VALUE klass = rb_class_new(cMessage); + rb_iv_set(klass, kMessageDefMemberName, message_def); + + // This shouldn't be necessary because we should inherit the alloc func from + // the base class of Message. For some reason this is not working properly + // and we are having to define it manually. + rb_define_alloc_func(klass, msg_alloc); + + return klass; +} + +// Call to create a new Message instance. +static VALUE msg_new(VALUE msgdef) { + return rb_class_new_instance(0, NULL, get_message_class(Qnil, msgdef)); +} + +// Looks up the given field. On success returns the upb_fielddef and stores the +// offset in *ofs. Otherwise raises a Ruby exception. +static const upb_fielddef *lookup_field(rupb_Message *msg, const char *field, + size_t len, size_t *ofs) { + const rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef); + const upb_fielddef *f = upb_msgdef_ntof(rmd->md, field, len); + if (!f) { - rb_raise(rb_eArgError, "No such field: %s", field); + rb_raise(rb_eArgError, "Message %s does not contain field %s", + upb_msgdef_fullname(rmd->md), field); } - *ofs = rmd->field_offsets[upb_fielddef_index(f)]; + + *ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; return f; } -static VALUE msg_setter(rb_msg *msg, VALUE field, VALUE val) { +// Sets the given field to the given value. +static void setprimitive(rupb_Message *m, size_t ofs, const upb_fielddef *f, + VALUE val) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: DEREF(m, ofs, float) = value_to_float(val); break; + case UPB_TYPE_DOUBLE: DEREF(m, ofs, double) = value_to_double(val); break; + case UPB_TYPE_BOOL: DEREF(m, ofs, bool) = value_to_bool(val); break; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: DEREF(m, ofs, int32_t) = value_to_int32(val); break; + case UPB_TYPE_UINT32: DEREF(m, ofs, uint32_t) = value_to_uint32(val); break; + case UPB_TYPE_INT64: DEREF(m, ofs, int64_t) = value_to_int64(val); break; + case UPB_TYPE_UINT64: DEREF(m, ofs, uint64_t) = value_to_uint64(val); break; + default: rb_bug("Unexpected type"); + } +} + +// Returns the Ruby VALUE for the given field. +static VALUE getprimitive(rupb_Message *m, size_t ofs, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: return float_to_value(DEREF(m, ofs, float)); + case UPB_TYPE_DOUBLE: return double_to_value(DEREF(m, ofs, double)); + case UPB_TYPE_BOOL: return bool_to_value(DEREF(m, ofs, bool)); + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: return int32_to_value(DEREF(m, ofs, int32_t)); + case UPB_TYPE_UINT32: return uint32_to_value(DEREF(m, ofs, uint32_t)); + case UPB_TYPE_INT64: return int64_to_value(DEREF(m, ofs, int64_t)); + case UPB_TYPE_UINT64: return uint64_to_value(DEREF(m, ofs, uint64_t)); + default: rb_bug("Unexpected type"); + } +} + +static VALUE msg_setter(rupb_Message *msg, VALUE field, VALUE val) { size_t ofs; - char *fieldp = RSTRING_PTR(field); - size_t field_last = RSTRING_LEN(field) - 1; // fieldp is a string like "id=". But we want to look up "id". - // We take the liberty of temporarily setting the "=" to NULL. - assert(fieldp[field_last] == '='); - fieldp[field_last] = '\0'; - const upb_fielddef *f = lookup_field(msg, fieldp, &ofs); - fieldp[field_last] = '='; + const upb_fielddef *f = + lookup_field(msg, RSTRING_PTR(field), RSTRING_LEN(field) - 1, &ofs); // Possibly introduce stricter type checking. - switch (upb_fielddef_type(f)) { - case UPB_TYPE_FLOAT: DEREF(msg, ofs, float) = NUM2DBL(val); - case UPB_TYPE_DOUBLE: DEREF(msg, ofs, double) = NUM2DBL(val); - case UPB_TYPE_BOOL: DEREF(msg, ofs, bool) = RTEST(val); - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: DEREF(msg, ofs, VALUE) = val; - case UPB_TYPE_MESSAGE: return Qnil; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: DEREF(msg, ofs, int32_t) = NUM2INT(val); - case UPB_TYPE_UINT32: DEREF(msg, ofs, uint32_t) = NUM2LONG(val); - case UPB_TYPE_INT64: DEREF(msg, ofs, int64_t) = NUM2LONG(val); - case UPB_TYPE_UINT64: DEREF(msg, ofs, uint64_t) = NUM2ULL(val); + if (is_ruby_value(f)) { + DEREF(msg, ofs, VALUE) = val; + } else { + setprimitive(msg, ofs, f, val); } return val; } -static VALUE msg_getter(rb_msg *msg, VALUE field) { +static VALUE msg_getter(rupb_Message *msg, VALUE field) { size_t ofs; - const upb_fielddef *f = lookup_field(msg, RSTRING_PTR(field), &ofs); + const upb_fielddef *f = + lookup_field(msg, RSTRING_PTR(field), RSTRING_LEN(field), &ofs); - switch (upb_fielddef_type(f)) { - case UPB_TYPE_FLOAT: return rb_float_new(DEREF(msg, ofs, float)); - case UPB_TYPE_DOUBLE: return rb_float_new(DEREF(msg, ofs, double)); - case UPB_TYPE_BOOL: return DEREF(msg, ofs, bool) ? Qtrue : Qfalse; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: return DEREF(msg, ofs, VALUE); - case UPB_TYPE_MESSAGE: return Qnil; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: return INT2NUM(DEREF(msg, ofs, int32_t)); - case UPB_TYPE_UINT32: return LONG2NUM(DEREF(msg, ofs, uint32_t)); - case UPB_TYPE_INT64: return LONG2NUM(DEREF(msg, ofs, int64_t)); - case UPB_TYPE_UINT64: return ULL2NUM(DEREF(msg, ofs, uint64_t)); + if (is_ruby_value(f)) { + return DEREF(msg, ofs, VALUE); + } else { + return getprimitive(msg, ofs, f); } - - rb_bug("Unexpected type"); } +// This is the Message object's "method_missing" method, so it receives calls +// for any method whose name was not recognized. We use it to implement getters +// and setters for every field +// +// call-seq: +// message.field -> current value of "field" +// message.field = new_value static VALUE msg_accessor(int argc, VALUE *argv, VALUE obj) { - rb_msg *msg; - Data_Get_Struct(obj, rb_msg, msg); + rupb_Message *msg = msg_get(obj); // method_missing protocol: (method [, arg1, arg2, ...]) assert(argc >= 1 && SYMBOL_P(argv[0])); @@ -270,72 +679,106 @@ static VALUE msg_accessor(int argc, VALUE *argv, VALUE obj) { } } -/* msgdef *********************************************************************/ - -static void msgdef_free(void *_rmd) { - rb_msgdef *rmd = _rmd; - upb_msgdef_unref(rmd->md, &rmd->md); - if (rmd->fill_method) { - upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method); - } - free(rmd->field_offsets); +// Called when Ruby wants to turn this value into a string. +// TODO(haberman): implement. +static VALUE msg_tostring(VALUE self) { + return rb_str_new2("tostring!"); } -const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd, - const void *owner) { - const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers); - upb_pbdecodermethodopts opts; - upb_pbdecodermethodopts_init(&opts, fill_handlers); +// call-seq: +// MessageClass.parse(binary_protobuf) -> message instance +// +// Parses a binary protobuf according to this message class and returns a new +// message instance of this class type. +static VALUE msg_parse(VALUE klass, VALUE binary_protobuf) { + Check_Type(binary_protobuf, T_STRING); + rupb_MessageDef *rmd = msgdef_get(msg_getmsgdef(klass)); - const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); - upb_handlers_unref(fill_handlers, &fill_handlers); - return ret; + VALUE msg = rb_class_new_instance(0, NULL, klass); + rupb_Message *msgp = msg_get(msg); + + const upb_pbdecodermethod *method = msgdef_decodermethod(rmd); + const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); + upb_pbdecoder decoder; + upb_sink sink; + upb_status status = UPB_STATUS_INIT; + + upb_pbdecoder_init(&decoder, method, &status); + upb_sink_reset(&sink, h, msgp); + upb_pbdecoder_resetoutput(&decoder, &sink); + upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf), + RSTRING_LEN(binary_protobuf), + upb_pbdecoder_input(&decoder)); + + // TODO(haberman): make uninit optional if custom allocator for parsing + // returns GC-rooted memory. That will make decoding longjmp-safe (required + // if parsing triggers any VM errors like OOM or errors in user handlers). + upb_pbdecoder_uninit(&decoder); + rupb_checkstatus(&status); + + return msg; } -// Calculates offsets for each field. +// call-seq: +// Message.serialize(message instance) -> serialized string // -// This lets us pack protos like structs instead of storing them like -// dictionaries. This speeds up a parsing a lot and also saves memory -// (unless messages are very sparse). -static void assign_offsets(rb_msgdef *rmd) { - size_t ofs = sizeof(rb_msg); // Msg starts with predeclared members. - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - size_t field_size = rupb_sizeof(upb_fielddef_type(f)); - ofs = align_up(ofs, field_size); // Align field properly. - rmd->field_offsets[upb_fielddef_index(f)] = ofs; - ofs += field_size; - } - rmd->size = ofs; -} +// Serializes the given message instance to a string. +static VALUE msg_serialize(VALUE klass, VALUE message) { + rupb_Message *msg = msg_get(message); + const rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef); -// Constructs a new Ruby wrapper object around the given msgdef. -static VALUE make_msgdef(const upb_msgdef *md) { - rb_msgdef *rmd; - VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd); + stringsink sink; + stringsink_init(&sink); - upb_msgdef_ref(md, &rmd->md); + upb_pb_encoder encoder; + upb_pb_encoder_init(&encoder, rmd->serialize_handlers); + upb_pb_encoder_resetoutput(&encoder, &sink.sink); - rmd->md = md; - rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); - rmd->fill_method = NULL; + putmsg(msg, rmd, upb_pb_encoder_input(&encoder)); - assign_offsets(rmd); + VALUE ret = rb_str_new(sink.ptr, sink.len); - rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method); + upb_pb_encoder_uninit(&encoder); + stringsink_uninit(&sink); return ret; } -// Loads a descriptor and constructs a MessageDef to the named message. -static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) { + +/* Upb::SymbolTable ***********************************************************/ + +// Ruby wrapper around a SymbolTable. Allows loading of descriptors and turning +// them into MessageDef objects. + +void symtab_free(void *s) { + upb_symtab_unref(s, UPB_UNTRACKED_REF); +} + +static const rb_data_type_t symtab_type = {"Upb::SymbolTable", + {NULL, symtab_free, NULL}}; + +// Called by the Ruby VM to allocate a SymbolTable object. +static VALUE symtab_alloc(VALUE klass) { upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF); + VALUE ret = TypedData_Wrap_Struct(klass, &symtab_type, symtab); - // Wrap the symtab in a Ruby object so it gets GC'd. - // In a real wrapper we would wrap this object more fully (ie. expose its - // methods to Ruby callers). - Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab); + return ret; +} + +static upb_symtab *symtab_get(VALUE self) { + upb_symtab *symtab; + TypedData_Get_Struct(self, upb_symtab, &symtab_type, symtab); + return symtab; +} + +// call-seq: +// symtab.load_descriptor(descriptor) +// +// Parses a FileDescriptorSet from the given string and adds the defs to the +// SymbolTable. Raises if there was an error. +static VALUE symtab_load_descriptor(VALUE self, VALUE descriptor) { + upb_symtab *symtab = symtab_get(self); + Check_Type(descriptor, T_STRING); upb_status status = UPB_STATUS_INIT; upb_load_descriptor_into_symtab( @@ -346,51 +789,377 @@ static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) { "Error loading descriptor: %s", upb_status_errmsg(&status)); } - const char *name = RSTRING_PTR(message_name); - const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name); + return Qnil; +} + +// call-seq: +// symtab.lookup(name) +// +// Returns the def for this name, or nil if none. +// TODO(haberman): only support messages right now, not enums. +static VALUE symtab_lookup(VALUE self, VALUE name) { + upb_symtab *symtab = symtab_get(self); + Check_Type(name, T_STRING); + + const char *cname = RSTRING_PTR(name); + const upb_msgdef *m = upb_symtab_lookupmsg(symtab, cname); if (!m) { - rb_raise(rb_eRuntimeError, "Message name '%s' not found", name); + rb_raise(rb_eRuntimeError, "Message name '%s' not found", cname); } - return make_msgdef(m); + return msgdef_getwrapper(m); } -static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) { - const rb_msgdef *rmd; - Data_Get_Struct(self, rb_msgdef, rmd); - VALUE msg = msg_new(self); - rb_msg *msgp; - Data_Get_Struct(msg, rb_msg, msgp); +/* handlers *******************************************************************/ - const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method); - upb_pbdecoder decoder; - upb_sink sink; - upb_status status = UPB_STATUS_INIT; +// These are handlers for populating a Ruby protobuf message (rupb_Message) when +// parsing. - upb_pbdecoder_init(&decoder, rmd->fill_method, &status); - upb_sink_reset(&sink, h, msgp); - upb_pbdecoder_resetoutput(&decoder, &sink); - upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf), - RSTRING_LEN(binary_protobuf), - upb_pbdecoder_input(&decoder)); - // TODO(haberman): make uninit optional if custom allocator for parsing - // returns GC-rooted memory. That will make decoding longjmp-safe (required - // if parsing triggers any VM errors like OOM or errors in user handlers). - upb_pbdecoder_uninit(&decoder); - rupb_checkstatus(&status); +// Creates a handlerdata that simply contains the offset for this field. +static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) { + size_t *hd_ofs = ALLOC(size_t); + *hd_ofs = ofs; + upb_handlers_addcleanup(h, hd_ofs, free); + return hd_ofs; +} - return msg; +typedef struct { + size_t ofs; + const upb_msgdef *md; +} submsg_handlerdata_t; + +// Creates a handlerdata that contains offset and submessage type information. +static const void *newsubmsghandlerdata(upb_handlers *h, uint32_t ofs, + const upb_fielddef *f) { + submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t); + hd->ofs = ofs; + hd->md = upb_fielddef_msgsubdef(f); + upb_handlers_addcleanup(h, hd, free); + return hd; +} + +// A handler that starts a repeated field. Gets or creates a Ruby array for the +// field. +static void *startseq_handler(void *closure, const void *hd) { + rupb_Message *msg = closure; + const size_t *ofs = hd; + + if (DEREF(msg, *ofs, VALUE) == Qnil) { + DEREF(msg, *ofs, VALUE) = rb_ary_new(); + } + + return (void*)DEREF(msg, *ofs, VALUE); +} + +// Handlers that append primitive values to a repeated field (a regular Ruby +// array for now). +#define DEFINE_APPEND_HANDLER(type, ctype) \ + static bool append##type##_handler(void *closure, const void *hd, \ + ctype val) { \ + VALUE ary = (VALUE)closure; \ + rb_ary_push(ary, type##_to_value(val)); \ + return true; \ + } + +DEFINE_APPEND_HANDLER(bool, bool) +DEFINE_APPEND_HANDLER(int32, int32_t) +DEFINE_APPEND_HANDLER(uint32, uint32_t) +DEFINE_APPEND_HANDLER(float, float) +DEFINE_APPEND_HANDLER(int64, int64_t) +DEFINE_APPEND_HANDLER(uint64, uint64_t) +DEFINE_APPEND_HANDLER(double, double) + +// Appends a string to a repeated field (a regular Ruby array for now). +static size_t appendstr_handler(void *closure, const void *hd, const char *str, + size_t len, const upb_bufhandle *handle) { + VALUE ary = (VALUE)closure; + rb_ary_push(ary, rb_str_new(str, len)); + return len; +} + +// Sets a non-repeated string field in a message. +static size_t str_handler(void *closure, const void *hd, const char *str, + size_t len, const upb_bufhandle *handle) { + rupb_Message *msg = closure; + const size_t *ofs = hd; + DEREF(msg, *ofs, VALUE) = rb_str_new(str, len); + return len; +} + +// Appends a submessage to a repeated field (a regular Ruby array for now). +static void *appendsubmsg_handler(void *closure, const void *hd) { + VALUE ary = (VALUE)closure; + const submsg_handlerdata_t *submsgdata = hd; + VALUE submsg = msg_new(msgdef_getwrapper(submsgdata->md)); + rb_ary_push(ary, submsg); + return msg_get(submsg); +} + +// Sets a non-repeated submessage field in a message. +static void *submsg_handler(void *closure, const void *hd) { + rupb_Message *msg = closure; + const submsg_handlerdata_t *submsgdata = hd; + + if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { + DEREF(msg, submsgdata->ofs, VALUE) = msg_new(msgdef_getwrapper(submsgdata->md)); + } + + VALUE submsg = DEREF(msg, submsgdata->ofs, VALUE); + return msg_get(submsg); +} + +static void add_handlers_for_message(const void *closure, upb_handlers *h) { + const rupb_MessageDef *rmd = get_rbmsgdef(upb_handlers_msgdef(h)); + upb_msg_iter i; + + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; + + if (upb_fielddef_isseq(f)) { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); + upb_handlers_setstartseq(h, f, startseq_handler, &attr); + upb_handlerattr_uninit(&attr); + + switch (upb_fielddef_type(f)) { + +#define SET_HANDLER(utype, ltype) \ + case utype: \ + upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \ + break; + + SET_HANDLER(UPB_TYPE_BOOL, bool); + SET_HANDLER(UPB_TYPE_INT32, int32); + SET_HANDLER(UPB_TYPE_UINT32, uint32); + SET_HANDLER(UPB_TYPE_ENUM, int32); + SET_HANDLER(UPB_TYPE_FLOAT, float); + SET_HANDLER(UPB_TYPE_INT64, int64); + SET_HANDLER(UPB_TYPE_UINT64, uint64); + SET_HANDLER(UPB_TYPE_DOUBLE, double); + +#undef SET_HANDLER + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + // XXX: does't currently handle split buffers. + upb_handlers_setstring(h, f, appendstr_handler, NULL); + break; + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f)); + upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } + } + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + // The shim writes directly at the given offset (instead of using + // DEREF()) so we need to add the msg overhead. + upb_shim_set(h, f, ofs + sizeof(rupb_Message), -1); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); + // XXX: does't currently handle split buffers. + upb_handlers_setstring(h, f, str_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, ofs, f)); + upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } + } +} + +// Creates upb handlers for populating a message. +static const upb_handlers *new_fill_handlers(const rupb_MessageDef *rmd, + const void *owner) { + return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, NULL); +} + + +/* msgvisitor *****************************************************************/ + +// This is code to push the contents of a Ruby message (rupb_Message) to a upb +// sink. + +static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { + upb_selector_t ret; + bool ok = upb_handlers_getselector(f, type, &ret); + UPB_ASSERT_VAR(ok, ok); + return ret; +} + +static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) { + if (str == Qnil) return; + + assert(BUILTIN_TYPE(str) == RUBY_T_STRING); + upb_sink subsink; + + upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str), + &subsink); + upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str), + RSTRING_LEN(str), NULL); + upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR)); +} + +static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink) { + if (submsg == Qnil) return; + + upb_sink subsink; + const rupb_MessageDef *sub_rmd = get_rbmsgdef(upb_fielddef_msgsubdef(f)); + + upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink); + putmsg(msg_get(submsg), sub_rmd, &subsink); + upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG)); +} + +static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink) { + if (ary == Qnil) return; + + assert(BUILTIN_TYPE(ary) == RUBY_T_ARRAY); + upb_sink subsink; + + upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); + + upb_fieldtype_t type = upb_fielddef_type(f); + upb_selector_t sel = 0; + if (upb_fielddef_isprimitive(f)) { + sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + } + + int i; + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE val = rb_ary_entry(ary, i); + switch (type) { + +#define T(upbtypeconst, upbtype, ctype) \ + case upbtypeconst: \ + upb_sink_put##upbtype(&subsink, sel, value_to_##upbtype(val)); \ + break; + + T(UPB_TYPE_FLOAT, float, float) + T(UPB_TYPE_DOUBLE, double, double) + T(UPB_TYPE_BOOL, bool, bool) + case UPB_TYPE_ENUM: + T(UPB_TYPE_INT32, int32, int32_t) + T(UPB_TYPE_UINT32, uint32, uint32_t) + T(UPB_TYPE_INT64, int64, int64_t) + T(UPB_TYPE_UINT64, uint64, uint64_t) + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + putstr(val, f, &subsink); + break; + case UPB_TYPE_MESSAGE: + putsubmsg(val, f, &subsink); + break; + +#undef T + + } + } + upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); +} + +static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd, + upb_sink *sink) { + upb_sink_startmsg(sink); + + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + uint32_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; + + if (upb_fielddef_isseq(f)) { + VALUE ary = DEREF(msg, ofs, VALUE); + if (ary != Qnil) { + putary(ary, f, sink); + } + } else if (upb_fielddef_isstring(f)) { + putstr(DEREF(msg, ofs, VALUE), f, sink); + } else if (upb_fielddef_issubmsg(f)) { + putsubmsg(DEREF(msg, ofs, VALUE), f, sink); + } else { + upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + +#define T(upbtypeconst, upbtype, ctype) \ + case upbtypeconst: \ + upb_sink_put##upbtype(sink, sel, DEREF(msg, ofs, ctype)); \ + break; + + switch (upb_fielddef_type(f)) { + T(UPB_TYPE_FLOAT, float, float) + T(UPB_TYPE_DOUBLE, double, double) + T(UPB_TYPE_BOOL, bool, bool) + case UPB_TYPE_ENUM: + T(UPB_TYPE_INT32, int32, int32_t) + T(UPB_TYPE_UINT32, uint32, uint32_t) + T(UPB_TYPE_INT64, int64, int64_t) + T(UPB_TYPE_UINT64, uint64, uint64_t) + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error."); + } + +#undef T + + } + } + + upb_status status; + upb_sink_endmsg(sink, &status); +} + + +/* top level ******************************************************************/ + +static VALUE get_message_class(VALUE klass, VALUE message) { + rupb_MessageDef *rmd = msgdef_get(message); + return rmd->klass; } void Init_upb() { VALUE upb = rb_define_module("Upb"); + rb_define_singleton_method(upb, "get_message_class", get_message_class, 1); + rb_gc_register_address(&message_map); + + cSymbolTable = rb_define_class_under(upb, "SymbolTable", rb_cObject); + rb_define_alloc_func(cSymbolTable, symtab_alloc); + rb_define_method(cSymbolTable, "load_descriptor", symtab_load_descriptor, 1); + rb_define_method(cSymbolTable, "lookup", symtab_lookup, 1); cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject); - rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2); - rb_define_method(cMessageDef, "parse", msgdef_parse, 1); cMessage = rb_define_class_under(upb, "Message", rb_cObject); + rb_define_alloc_func(cMessage, msg_alloc); rb_define_method(cMessage, "method_missing", msg_accessor, -1); + rb_define_method(cMessage, "to_s", msg_tostring, 0); + rb_define_singleton_method(cMessage, "parse", msg_parse, 1); + rb_define_singleton_method(cMessage, "serialize", msg_serialize, 1); + + objcache_init(); + + // This causes atexit crashes for unknown reasons. :( + // ruby_vm_at_exit(objcache_uninit); } diff --git a/upb/def.c b/upb/def.c index b1598d8..fde2ee8 100644 --- a/upb/def.c +++ b/upb/def.c @@ -628,6 +628,7 @@ upb_fielddef *upb_fielddef_new(const void *owner) { f->tagdelim = false; f->is_extension_ = false; f->lazy_ = false; + f->packed_ = true; // For the moment we default this to UPB_INTFMT_VARIABLE, since it will work // with all integer types and is in some since more "default" since the most @@ -735,6 +736,10 @@ bool upb_fielddef_lazy(const upb_fielddef *f) { return f->lazy_; } +bool upb_fielddef_packed(const upb_fielddef *f) { + return f->packed_; +} + const char *upb_fielddef_name(const upb_fielddef *f) { return upb_def_fullname(UPB_UPCAST(f)); } @@ -1030,6 +1035,11 @@ void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) { f->lazy_ = lazy; } +void upb_fielddef_setpacked(upb_fielddef *f, bool packed) { + assert(!upb_fielddef_isfrozen(f)); + f->packed_ = packed; +} + void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { assert(!upb_fielddef_isfrozen(f)); assert(upb_fielddef_checklabel(label)); @@ -1341,7 +1351,7 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, upb_status_seterrmsg(s, "field name or number were not set"); return false; } else if(upb_msgdef_itof(m, upb_fielddef_number(f)) || - upb_msgdef_ntof(m, upb_fielddef_name(f))) { + upb_msgdef_ntofz(m, upb_fielddef_name(f))) { upb_status_seterrmsg(s, "duplicate field name or number"); return false; } @@ -1365,20 +1375,13 @@ const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { upb_value_getptr(val) : NULL; } -const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, + size_t len) { upb_value val; - return upb_strtable_lookup(&m->ntof, name, &val) ? + return upb_strtable_lookup2(&m->ntof, name, len, &val) ? upb_value_getptr(val) : NULL; } -upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) { - return (upb_fielddef*)upb_msgdef_itof(m, i); -} - -upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name) { - return (upb_fielddef*)upb_msgdef_ntof(m, name); -} - int upb_msgdef_numfields(const upb_msgdef *m) { return upb_strtable_count(&m->ntof); } diff --git a/upb/def.h b/upb/def.h index 7a9a655..2699fbf 100644 --- a/upb/def.h +++ b/upb/def.h @@ -324,6 +324,13 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD, // contain both regular FieldOptions like "lazy" *and* custom options). bool lazy() const; + // For non-string, non-submessage fields, this indicates whether binary + // protobufs are encoded in packed or non-packed format. + // + // TODO(haberman): see note above about putting options like this into a + // FieldOptions container. + bool packed() const; + // An integer that can be used as an index into an array of fields for // whatever message this field belongs to. Guaranteed to be less than // f->containing_type()->field_count(). May only be accessed once the def has @@ -430,11 +437,14 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD, bool set_containing_type_name(const char *name, Status* status); bool set_containing_type_name(const std::string& name, Status* status); - // When we freeze, we ensure that this can only be true for length-delimited - // message fields. Prior to freezing this can be true or false with no - // restrictions. + // Defaults to false. When we freeze, we ensure that this can only be true + // for length-delimited message fields. Prior to freezing this can be true or + // false with no restrictions. void set_lazy(bool lazy); + // Defaults to true. Sets whether this field is encoded in packed format. + void set_packed(bool packed); + // "type" or "descriptor_type" MUST be set explicitly before the fielddef is // finalized. These setters require that the enum value is valid; if the // value did not come directly from an enum constant, the caller should @@ -515,6 +525,7 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def, bool type_is_set_; // False until type is explicitly set. bool is_extension_; bool lazy_; + bool packed_; upb_intfmt_t intfmt; bool tagdelim; upb_fieldtype_t type_; @@ -525,13 +536,13 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def, )); #define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, \ - name, num, msgdef, subdef, selector_base, index, \ - defaultval, refs, ref2s) \ + packed, name, num, msgdef, subdef, selector_base, \ + index, defaultval, refs, ref2s) \ { \ UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef}, \ {subdef}, false, false, \ type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \ - lazy, intfmt, tagdelim, type, label, num, selector_base, index \ + lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \ } UPB_BEGIN_EXTERN_C // { @@ -561,6 +572,7 @@ uint32_t upb_fielddef_number(const upb_fielddef *f); const char *upb_fielddef_name(const upb_fielddef *f); bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); +bool upb_fielddef_packed(const upb_fielddef *f); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f); const char *upb_fielddef_containingtypename(upb_fielddef *f); @@ -596,6 +608,7 @@ bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name, upb_status *s); void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension); void upb_fielddef_setlazy(upb_fielddef *f, bool lazy); +void upb_fielddef_setpacked(upb_fielddef *f, bool packed); void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt); void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim); void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t val); @@ -668,9 +681,26 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( // These return NULL if the field is not found. FieldDef* FindFieldByNumber(uint32_t number); - FieldDef* FindFieldByName(const char *name); + FieldDef* FindFieldByName(const char *name, size_t len); const FieldDef* FindFieldByNumber(uint32_t number) const; - const FieldDef* FindFieldByName(const char* name) const; + const FieldDef* FindFieldByName(const char* name, size_t len) const; + + + FieldDef* FindFieldByName(const char *name) { + return FindFieldByName(name, strlen(name)); + } + const FieldDef* FindFieldByName(const char *name) const { + return FindFieldByName(name, strlen(name)); + } + + template + FieldDef* FindFieldByName(const T& str) { + return FindFieldByName(str.c_str(), str.size()); + } + template + const FieldDef* FindFieldByName(const T& str) const { + return FindFieldByName(str.c_str(), str.size()); + } // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced @@ -759,12 +789,30 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s); upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, upb_status *s); + +// Field lookup in a couple of different variations: +// - itof = int to field +// - ntof = name to field +// - ntofz = name to field, null-terminated string. const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i); -const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name); -upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i); -upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name); +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, + size_t len); int upb_msgdef_numfields(const upb_msgdef *m); +UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, + const char *name) { + return upb_msgdef_ntof(m, name, strlen(name)); +} + +UPB_INLINE upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) { + return (upb_fielddef*)upb_msgdef_itof(m, i); +} + +UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, + const char *name, size_t len) { + return (upb_fielddef *)upb_msgdef_ntof(m, name, len); +} + // upb_msg_iter i; // for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { // upb_fielddef *f = upb_msg_iter_field(&i); @@ -1027,6 +1075,12 @@ inline bool FieldDef::lazy() const { inline void FieldDef::set_lazy(bool lazy) { upb_fielddef_setlazy(this, lazy); } +inline bool FieldDef::packed() const { + return upb_fielddef_packed(this); +} +inline void FieldDef::set_packed(bool packed) { + upb_fielddef_setpacked(this, packed); +} inline const MessageDef* FieldDef::containing_type() const { return upb_fielddef_containingtype(this); } @@ -1189,14 +1243,15 @@ inline bool MessageDef::AddField(const reffed_ptr& f, Status* s) { inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) { return upb_msgdef_itof_mutable(this, number); } -inline FieldDef* MessageDef::FindFieldByName(const char* name) { - return upb_msgdef_ntof_mutable(this, name); +inline FieldDef* MessageDef::FindFieldByName(const char* name, size_t len) { + return upb_msgdef_ntof_mutable(this, name, len); } inline const FieldDef* MessageDef::FindFieldByNumber(uint32_t number) const { return upb_msgdef_itof(this, number); } -inline const FieldDef* MessageDef::FindFieldByName(const char* name) const { - return upb_msgdef_ntof(this, name); +inline const FieldDef *MessageDef::FindFieldByName(const char *name, + size_t len) const { + return upb_msgdef_ntof(this, name, len); } inline MessageDef* MessageDef::Dup(const void *owner) const { return upb_msgdef_dup(this, owner); diff --git a/upb/descriptor/descriptor.upb.c b/upb/descriptor/descriptor.upb.c index 31503b1..3678db1 100755 --- a/upb/descriptor/descriptor.upb.c +++ b/upb/descriptor/descriptor.upb.c @@ -40,87 +40,87 @@ static const upb_msgdef msgs[20] = { }; static const upb_fielddef fields[81] = { - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "ctype", 1, &msgs[7], UPB_UPCAST(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "enum_type", 4, &msgs[0], UPB_UPCAST(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "enum_type", 5, &msgs[8], UPB_UPCAST(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "extension", 7, &msgs[8], UPB_UPCAST(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "extension", 6, &msgs[0], UPB_UPCAST(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "extension_range", 5, &msgs[0], UPB_UPCAST(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "field", 2, &msgs[0], UPB_UPCAST(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "file", 1, &msgs[9], UPB_UPCAST(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]), - UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "label", 4, &msgs[6], UPB_UPCAST(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "location", 1, &msgs[16], UPB_UPCAST(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "message_type", 4, &msgs[8], UPB_UPCAST(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "method", 2, &msgs[14], UPB_UPCAST(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "name", 2, &msgs[18], UPB_UPCAST(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]), - UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "nested_type", 3, &msgs[0], UPB_UPCAST(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "optimize_for", 9, &msgs[10], UPB_UPCAST(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 7, &msgs[0], UPB_UPCAST(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 3, &msgs[2], UPB_UPCAST(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 8, &msgs[6], UPB_UPCAST(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 3, &msgs[4], UPB_UPCAST(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 8, &msgs[8], UPB_UPCAST(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 3, &msgs[14], UPB_UPCAST(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "options", 4, &msgs[12], UPB_UPCAST(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "service", 6, &msgs[8], UPB_UPCAST(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, "source_code_info", 9, &msgs[8], UPB_UPCAST(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, "type", 5, &msgs[6], UPB_UPCAST(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[5], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[15], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[3], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[13], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[10], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[11], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "uninterpreted_option", 999, &msgs[7], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, "value", 2, &msgs[2], UPB_UPCAST(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], UPB_UPCAST(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], UPB_UPCAST(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], UPB_UPCAST(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], UPB_UPCAST(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], UPB_UPCAST(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], UPB_UPCAST(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], UPB_UPCAST(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], UPB_UPCAST(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], UPB_UPCAST(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], UPB_UPCAST(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], UPB_UPCAST(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], UPB_UPCAST(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], UPB_UPCAST(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], UPB_UPCAST(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], UPB_UPCAST(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], UPB_UPCAST(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], UPB_UPCAST(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], UPB_UPCAST(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], UPB_UPCAST(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], UPB_UPCAST(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], UPB_UPCAST(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], UPB_UPCAST(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], UPB_UPCAST(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], UPB_UPCAST(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], UPB_UPCAST(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], UPB_UPCAST(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], UPB_UPCAST(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]), }; static const upb_enumdef enums[4] = { diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c index fdfa4e3..1baad81 100644 --- a/upb/descriptor/reader.c +++ b/upb/descriptor/reader.c @@ -272,6 +272,9 @@ static bool field_startmsg(void *closure, const void *hd) { r->f = upb_fielddef_new(&r->defs); free(r->default_string); r->default_string = NULL; + + // fielddefs default to packed, but descriptors default to non-packed. + upb_fielddef_setpacked(r->f, false); return true; } @@ -378,6 +381,13 @@ static bool field_onlazy(void *closure, const void *hd, bool val) { return true; } +static bool field_onpacked(void *closure, const void *hd, bool val) { + UPB_UNUSED(hd); + upb_descreader *r = closure; + upb_fielddef_setpacked(r->f, val); + return true; +} + static bool field_ontype(void *closure, const void *hd, int32_t val) { UPB_UNUSED(hd); upb_descreader *r = closure; @@ -552,6 +562,7 @@ static void reghandlers(const void *closure, upb_handlers *h) { &field_ondefaultval, NULL); } else if (m == D(FieldOptions)) { upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL); + upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL); } } diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index 8452bea..64689f6 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -149,7 +149,7 @@ const upb_pbdecodermethod *upb_pbdecodermethod_new( } -/* compiler *******************************************************************/ +/* bytecode compiler **********************************************************/ // Data used only at compilation time. typedef struct { @@ -575,8 +575,8 @@ static void putsel(compiler *c, opcode op, upb_selector_t sel, // Puts an opcode to call a callback, but only if a callback actually exists for // this field and handler type. -static void putcb(compiler *c, opcode op, const upb_handlers *h, - const upb_fielddef *f, upb_handlertype_t type) { +static void maybeput(compiler *c, opcode op, const upb_handlers *h, + const upb_fielddef *f, upb_handlertype_t type) { putsel(c, op, getsel(f, type), h); } @@ -589,40 +589,165 @@ static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) { upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR)); } + +/* bytecode compiler code generation ******************************************/ + +// Symbolic names for our local labels. +#define LABEL_LOOPSTART 1 // Top of a repeated field loop. +#define LABEL_LOOPBREAK 2 // To jump out of a repeated loop +#define LABEL_FIELD 3 // Jump backward to find the most recent field. +#define LABEL_ENDMSG 4 // To reach the OP_ENDMSG instr for this msg. + +// Generates bytecode to parse a single non-lazy message field. +static void generate_msgfield(compiler *c, const upb_fielddef *f, + upb_pbdecodermethod *method) { + const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); + const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); + + if (!sub_m) { + // Don't emit any code for this field at all; it will be parsed as an + // unknown field. + return; + } + + label(c, LABEL_FIELD); + + int wire_type = + (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) + ? UPB_WIRE_TYPE_DELIMITED + : UPB_WIRE_TYPE_START_GROUP; + + if (upb_fielddef_isseq(f)) { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, wire_type, LABEL_DISPATCH); + dispatchtarget(c, method, f, wire_type); + putop(c, OP_PUSHTAGDELIM, 0); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); + label(c, LABEL_LOOPSTART); + putpush(c, f); + putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); + putop(c, OP_CALL, sub_m); + putop(c, OP_POP); + maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); + if (wire_type == UPB_WIRE_TYPE_DELIMITED) { + putop(c, OP_SETDELIM); + } + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putchecktag(c, f, wire_type, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + label(c, LABEL_LOOPBREAK); + putop(c, OP_POP); + maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); + } else { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, wire_type, LABEL_DISPATCH); + dispatchtarget(c, method, f, wire_type); + putpush(c, f); + putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); + putop(c, OP_CALL, sub_m); + putop(c, OP_POP); + maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); + if (wire_type == UPB_WIRE_TYPE_DELIMITED) { + putop(c, OP_SETDELIM); + } + } +} + +// Generates bytecode to parse a single string or lazy submessage field. +static void generate_delimfield(compiler *c, const upb_fielddef *f, + upb_pbdecodermethod *method) { + const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); + + label(c, LABEL_FIELD); + if (upb_fielddef_isseq(f)) { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); + dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); + putop(c, OP_PUSHTAGDELIM, 0); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); + label(c, LABEL_LOOPSTART); + putop(c, OP_PUSHLENDELIM); + putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); + // Need to emit even if no handler to skip past the string. + putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); + putop(c, OP_POP); + maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); + putop(c, OP_SETDELIM); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + label(c, LABEL_LOOPBREAK); + putop(c, OP_POP); + maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); + } else { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); + dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); + putop(c, OP_PUSHLENDELIM); + putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); + putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); + putop(c, OP_POP); + maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); + putop(c, OP_SETDELIM); + } +} + +// Generates bytecode to parse a single primitive field. +static void generate_primitivefield(compiler *c, const upb_fielddef *f, + upb_pbdecodermethod *method) { + label(c, LABEL_FIELD); + + const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); + upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); + + // From a decoding perspective, ENUM is the same as INT32. + if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) + descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; + + opcode parse_type = (opcode)descriptor_type; + + // TODO(haberman): generate packed or non-packed first depending on "packed" + // setting in the fielddef. This will favor (in speed) whichever was + // specified. + + assert((int)parse_type >= 0 && parse_type <= OP_MAX); + upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + int wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; + if (upb_fielddef_isseq(f)) { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); + dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); + putop(c, OP_PUSHLENDELIM); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed + label(c, LABEL_LOOPSTART); + putop(c, parse_type, sel); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + dispatchtarget(c, method, f, wire_type); + putop(c, OP_PUSHTAGDELIM, 0); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed + label(c, LABEL_LOOPSTART); + putop(c, parse_type, sel); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putchecktag(c, f, wire_type, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + label(c, LABEL_LOOPBREAK); + putop(c, OP_POP); // Packed and non-packed join. + maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); + putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ. + } else { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, wire_type, LABEL_DISPATCH); + dispatchtarget(c, method, f, wire_type); + putop(c, parse_type, sel); + } +} + // Adds bytecode for parsing the given message to the given decoderplan, // while adding all dispatch targets to this message's dispatch table. static void compile_method(compiler *c, upb_pbdecodermethod *method) { assert(method); - // Symbolic names for our local labels. - const int LABEL_LOOPSTART = 1; // Top of a repeated field loop. - const int LABEL_LOOPBREAK = 2; // To jump out of a repeated loop - const int LABEL_FIELD = 3; // Jump backward to find the most recent field. - const int LABEL_ENDMSG = 4; // To reach the OP_ENDMSG instr for this msg. - - // Index is descriptor type. - static const uint8_t native_wire_types[] = { - UPB_WIRE_TYPE_END_GROUP, // ENDGROUP - UPB_WIRE_TYPE_64BIT, // DOUBLE - UPB_WIRE_TYPE_32BIT, // FLOAT - UPB_WIRE_TYPE_VARINT, // INT64 - UPB_WIRE_TYPE_VARINT, // UINT64 - UPB_WIRE_TYPE_VARINT, // INT32 - UPB_WIRE_TYPE_64BIT, // FIXED64 - UPB_WIRE_TYPE_32BIT, // FIXED32 - UPB_WIRE_TYPE_VARINT, // BOOL - UPB_WIRE_TYPE_DELIMITED, // STRING - UPB_WIRE_TYPE_START_GROUP, // GROUP - UPB_WIRE_TYPE_DELIMITED, // MESSAGE - UPB_WIRE_TYPE_DELIMITED, // BYTES - UPB_WIRE_TYPE_VARINT, // UINT32 - UPB_WIRE_TYPE_VARINT, // ENUM - UPB_WIRE_TYPE_32BIT, // SFIXED32 - UPB_WIRE_TYPE_64BIT, // SFIXED64 - UPB_WIRE_TYPE_VARINT, // SINT32 - UPB_WIRE_TYPE_VARINT, // SINT64 - }; - // Clear all entries in the dispatch table. upb_inttable_uninit(&method->dispatch); upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); @@ -637,128 +762,15 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { upb_msg_iter i; for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); upb_fieldtype_t type = upb_fielddef_type(f); - // From a decoding perspective, ENUM is the same as INT32. - if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) - descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; - if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) { - const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); - if (!sub_m) { - // Don't emit any code for this field at all; it will be parsed as an - // unknown field. - continue; - } - - label(c, LABEL_FIELD); - - int wire_type = (descriptor_type == UPB_DESCRIPTOR_TYPE_MESSAGE) - ? UPB_WIRE_TYPE_DELIMITED - : UPB_WIRE_TYPE_START_GROUP; - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - } + generate_msgfield(c, f, method); } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES || type == UPB_TYPE_MESSAGE) { - label(c, LABEL_FIELD); - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - // Need to emit even if no handler to skip past the string. - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - putop(c, OP_POP); - putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); - putop(c, OP_SETDELIM); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - putop(c, OP_POP); - putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); - putop(c, OP_SETDELIM); - } + generate_delimfield(c, f, method); } else { - label(c, LABEL_FIELD); - opcode parse_type = (opcode)descriptor_type; - assert((int)parse_type >= 0 && parse_type <= OP_MAX); - upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - int wire_type = native_wire_types[upb_fielddef_descriptortype(f)]; - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); // Packed and non-packed join. - putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ. - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, parse_type, sel); - } + generate_primitivefield(c, f, method); } } diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c index 44c4419..b4086c7 100644 --- a/upb/pb/compile_decoder_x64.c +++ b/upb/pb/compile_decoder_x64.c @@ -23,7 +23,7 @@ // // Note: this mode requires that we can shell out to gcc. // -// 2. Run the test once locally. This will load the JIT code by building a +// 2. Run the test locally. This will load the JIT code by building a // .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will // work properly (like GDB). // diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc index 180017f..a87b376 100644 --- a/upb/pb/compile_decoder_x64.dasc +++ b/upb/pb/compile_decoder_x64.dasc @@ -61,17 +61,21 @@ | add DELIMEND, DECODER->buf |.endmacro | -| // OPT: use "call rel32" where possible. +| // Calls an external C function at address "addr". |.macro callp, addr -|| { -|| //int64_t ofs = (int64_t)addr - (int64_t)upb_status_init; -|| //if (ofs > (1 << 30) || ofs < -(1 << 30)) { | mov64 rax, (uintptr_t)addr +| +| // Stack must be 16-byte aligned (x86-64 ABI requires this). +| // +| // OPT: possibly remove this by statically ensuring correct alignment. +| // +| // OPT: use "call rel32" where possible. +| push r12 +| mov r12, rsp +| and rsp, 0xfffffffffffffff0UL // Align stack. | call rax -|| //} else { -| // call &addr -|| //} -|| } +| mov rsp, r12 +| pop r12 |.endmacro | |.macro ld64, val @@ -208,12 +212,6 @@ static void emit_static_asm(jitcompiler *jc) { | push r12 | push rbx | - | // Align stack. - | // Since the JIT can call other functions (the JIT'ted code is not a leaf - | // function) we must respect alignment rules. All x86-64 systems require - | // 16-byte stack alignment. - | sub rsp, 8 - | | mov rbx, ARG2_64 // Preserve JIT method. | | mov DECODER, rdi @@ -234,7 +232,6 @@ static void emit_static_asm(jitcompiler *jc) { | mov rax, DECODER->size_param | mov qword DECODER->call_len, 0 |1: - | add rsp, 8 // Counter previous alignment. | pop rbx | pop r12 | pop r13 @@ -270,7 +267,6 @@ static void emit_static_asm(jitcompiler *jc) { | // Must NOT do this before the memcpy(), otherwise memcpy() will | // clobber the stack we are trying to save! | mov rsp, DECODER->saved_rsp - | add rsp, 8 // Counter previous alignment. | pop rbx | pop r12 | pop r13 diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c index 975f3ab..4681c20 100644 --- a/upb/pb/encoder.c +++ b/upb/pb/encoder.c @@ -1,421 +1,496 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Copyright (c) 2014 Google Inc. See LICENSE for details. * Author: Josh Haberman + * + * Since we are implementing pure handlers (ie. without any out-of-band access + * to pre-computed lengths), we have to buffer all submessages before we can + * emit even their first byte. + * + * Not knowing the size of submessages also means we can't write a perfect + * zero-copy implementation, even with buffering. Lengths are stored as + * varints, which means that we don't know how many bytes to reserve for the + * length until we know what the length is. + * + * This leaves us with three main choices: + * + * 1. buffer all submessage data in a temporary buffer, then copy it exactly + * once into the output buffer. + * + * 2. attempt to buffer data directly into the output buffer, estimating how + * many bytes each length will take. When our guesses are wrong, use + * memmove() to grow or shrink the allotted space. + * + * 3. buffer directly into the output buffer, allocating a max length + * ahead-of-time for each submessage length. If we overallocated, we waste + * space, but no memcpy() or memmove() is required. This approach requires + * defining a maximum size for submessages and rejecting submessages that + * exceed that size. + * + * (2) and (3) have the potential to have better performance, but they are more + * complicated and subtle to implement: + * + * (3) requires making an arbitrary choice of the maximum message size; it + * wastes space when submessages are shorter than this and fails + * completely when they are longer. This makes it more finicky and + * requires configuration based on the input. It also makes it impossible + * to perfectly match the output of reference encoders that always use the + * optimal amount of space for each length. + * + * (2) requires guessing the the size upfront, and if multiple lengths are + * guessed wrong the minimum required number of memmove() operations may + * be complicated to compute correctly. Implemented properly, it may have + * a useful amortized or average cost, but more investigation is required + * to determine this and what the optimal algorithm is to achieve it. + * + * (1) makes you always pay for exactly one copy, but its implementation is + * the simplest and its performance is predictable. + * + * So for now, we implement (1) only. If we wish to optimize later, we should + * be able to do it without affecting users. + * + * The strategy is to buffer the segments of data that do *not* depend on + * unknown lengths in one buffer, and keep a separate buffer of segment pointers + * and lengths. When the top-level submessage ends, we can go beginning to end, + * alternating the writing of lengths with memcpy() of the rest of the data. + * At the top level though, no buffering is required. */ #include "upb/pb/encoder.h" +#include "upb/pb/varint.int.h" #include -#include "upb/descriptor.h" - -/* Functions for calculating sizes of wire values. ****************************/ - -static size_t upb_v_uint64_t_size(uint64_t val) { -#ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 7 + 1; -} -static size_t upb_v_int32_t_size(int32_t val) { - // v_uint32's are sign-extended to maintain wire compatibility with int64s. - return upb_v_uint64_t_size((int64_t)val); +/* low-level buffering ********************************************************/ + +// Low-level functions for interacting with the output buffer. + +// TODO(haberman): handle pushback +static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) { + size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL); + UPB_ASSERT_VAR(n, n == len); } -static size_t upb_v_uint32_t_size(uint32_t val) { - return upb_v_uint64_t_size(val); + +static upb_pb_encoder_segment *top(upb_pb_encoder *e) { + return &e->segbuf[*e->top]; } -static size_t upb_f_uint64_t_size(uint64_t val) { - (void)val; // Length is independent of value. - return sizeof(uint64_t); + +// Call to ensure that at least "bytes" bytes are available for writing at +// e->ptr. Returns false if the bytes could not be allocated. +static bool reserve(upb_pb_encoder *e, size_t bytes) { + if ((e->limit - e->ptr) < bytes) { + size_t needed = bytes + (e->ptr - e->buf); + size_t old_size = e->limit - e->buf; + size_t new_size = old_size; + while (new_size < needed) { + new_size *= 2; + } + + char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf; + char *new_buf = realloc(realloc_from, new_size); + + if (new_buf == NULL) { + return false; + } + + if (realloc_from == NULL) { + memcpy(new_buf, e->initbuf, old_size); + } + + e->ptr = new_buf + (e->ptr - e->buf); + e->runbegin = new_buf + (e->runbegin - e->buf); + e->limit = new_buf + new_size; + e->buf = new_buf; + } + + return true; } -static size_t upb_f_uint32_t_size(uint32_t val) { - (void)val; // Length is independent of value. - return sizeof(uint32_t); + +// Call when "bytes" bytes have been writte at e->ptr. The caller *must* have +// previously called reserve() with at least this many bytes. +static void advance(upb_pb_encoder *e, size_t bytes) { + assert((e->limit - e->ptr) >= bytes); + e->ptr += bytes; } +// Call when all of the bytes for a handler have been written. Flushes the +// bytes if possible and necessary, returning false if this failed. +static bool commit(upb_pb_encoder *e) { + if (!e->top) { + // We aren't inside a delimited region. Flush our accumulated bytes to + // the output. + // + // TODO(haberman): in the future we may want to delay flushing for + // efficiency reasons. + putbuf(e, e->buf, e->ptr - e->buf); + e->ptr = e->buf; + } -/* Functions to write wire values. ********************************************/ + return true; +} -// Since we know in advance the longest that the value could be, we always make -// sure that our buffer is long enough. This saves us from having to perform -// bounds checks. +// Writes the given bytes to the buffer, handling reserve/advance. +static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) { + if (!reserve(e, len)) { + return false; + } -// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). -static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) -{ - do { - uint8_t byte = val & 0x7f; - val >>= 7; - if(val) byte |= 0x80; - *buf++ = byte; - } while(val); - return buf; + memcpy(e->ptr, data, len); + advance(e, len); + return true; } -// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. -static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) -{ - return upb_put_v_uint64_t(buf, val); +// Finish the current run by adding the run totals to the segment and message +// length. +static void accumulate(upb_pb_encoder *e) { + assert(e->ptr >= e->runbegin); + size_t run_len = e->ptr - e->runbegin; + e->segptr->seglen += run_len; + top(e)->msglen += run_len; + e->runbegin = e->ptr; } -// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to -// maintain wire-compatibility with 64-bit signed integers. -static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) -{ - return upb_put_v_uint64_t(buf, (int64_t)val); +// Call to indicate the start of delimited region for which the full length is +// not yet known. All data will be buffered until the length is known. +// Delimited regions may be nested; their lengths will all be tracked properly. +static bool start_delim(upb_pb_encoder *e) { + if (e->top) { + // We are already buffering, advance to the next segment and push it on the + // stack. + accumulate(e); + + if (++e->top == e->stacklimit) { + // TODO(haberman): grow stack? + return false; + } + + if (++e->segptr == e->seglimit) { + upb_pb_encoder_segment *realloc_from = + (e->segbuf == e->seginitbuf) ? NULL : e->segbuf; + size_t old_size = + (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); + size_t new_size = old_size * 2; + upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size); + + if (new_buf == NULL) { + return false; + } + + if (realloc_from == NULL) { + memcpy(new_buf, e->seginitbuf, old_size); + } + + e->segptr = new_buf + (e->segptr - e->segbuf); + e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); + e->segbuf = new_buf; + } + } else { + // We were previously at the top level, start buffering. + e->segptr = e->segbuf; + e->top = e->stack; + e->runbegin = e->ptr; + } + + *e->top = e->segptr - e->segbuf; + e->segptr->seglen = 0; + e->segptr->msglen = 0; + + return true; } -static void upb_put32(uint8_t *buf, uint32_t val) { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24); +// Call to indicate the end of a delimited region. We now know the length of +// the delimited region. If we are not nested inside any other delimited +// regions, we can now emit all of the buffered data we accumulated. +static bool end_delim(upb_pb_encoder *e) { + accumulate(e); + size_t msglen = top(e)->msglen; + + if (e->top == e->stack) { + // All lengths are now available, emit all buffered data. + char buf[UPB_PB_VARINT_MAX_LEN]; + upb_pb_encoder_segment *s; + const char *ptr = e->buf; + for (s = e->segbuf; s <= e->segptr; s++) { + size_t lenbytes = upb_vencode64(s->msglen, buf); + putbuf(e, buf, lenbytes); + putbuf(e, ptr, s->seglen); + ptr += s->seglen; + } + + e->ptr = e->buf; + e->top = NULL; + } else { + // Need to keep buffering; propagate length info into enclosing submessages. + --e->top; + top(e)->msglen += msglen + upb_varint_size(msglen); + } + + return true; } -// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); -#if UPB_UNALIGNED_READS_OK - *(uint32_t*)buf = val; -#else - upb_put32(buf, val); -#endif - return uint32_end; + +/* tag_t **********************************************************************/ + +// A precomputed (pre-encoded) tag and length. + +typedef struct { + uint8_t bytes; + char tag[7]; +} tag_t; + +// Allocates a new tag for this field, and sets it in these handlerattr. +static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt, + upb_handlerattr *attr) { + uint32_t n = upb_fielddef_number(f); + + tag_t *tag = malloc(sizeof(tag_t)); + tag->bytes = upb_vencode64((n << 3) | wt, tag->tag); + + upb_handlerattr_init(attr); + upb_handlerattr_sethandlerdata(attr, tag); + upb_handlers_addcleanup(h, tag, free); } -// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); -#if UPB_UNALIGNED_READS_OK - *(uint64_t*)buf = val; -#else - upb_put32(buf, (uint32_t)val); - upb_put32(buf, (uint32_t)(val >> 32)); -#endif - return uint64_end; +static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) { + return encode_bytes(e, tag->tag, tag->bytes); } -/* Functions to write and calculate sizes for .proto values. ******************/ -// Performs zig-zag encoding, which is used by sint32 and sint64. -static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } -static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } +/* encoding of wire types *****************************************************/ -/* Use macros to define a set of two functions for each .proto type: - * - * // Converts and writes a .proto value into buf. "end" indicates the end - * // of the current available buffer (if the buffer does not contain enough - * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will - * // point one past the data that was written. - * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); - * - * // Returns the number of bytes required to encode val. - * size_t upb_get_INT32_size(int32_t val); - * - * // Given a .proto value s (source) convert it to a wire value. - * uint32_t upb_vtowv_INT32(int32_t s); - */ +static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) { + // TODO(haberman): byte-swap for big endian. + return encode_bytes(e, &val, sizeof(uint64_t)); +} -#define VTOWV(type, wire_t, val_t) \ - static wire_t upb_vtowv_ ## type(val_t s) +static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) { + // TODO(haberman): byte-swap for big endian. + return encode_bytes(e, &val, sizeof(uint32_t)); +} -#define PUT(type, v_or_f, wire_t, val_t, member_name) \ - static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ - wire_t tmp = upb_vtowv_ ## type(val); \ - return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ +static bool encode_varint(upb_pb_encoder *e, uint64_t val) { + if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) { + return false; } -#define T(type, v_or_f, wire_t, val_t, member_name) \ - static size_t upb_get_ ## type ## _size(val_t val) { \ - return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ - } \ - VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ - PUT(type, v_or_f, wire_t, val_t, member_name) \ - VTOWV(type, wire_t, val_t) - -T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - upb_value v; - v._double = s; - return v.uint64; + advance(e, upb_vencode64(val, e->ptr)); + return true; } -T(FLOAT, f, uint32_t, float, _float) { - upb_value v; - v._float = s; - return v.uint32; + +static uint64_t dbl2uint64(double d) { + uint64_t ret; + memcpy(&ret, &d, sizeof(uint64_t)); + return ret; } -#undef VTOWV -#undef PUT -#undef T -static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return buf; +static uint32_t flt2uint32(float d) { + uint32_t ret; + memcpy(&ret, &d, sizeof(uint32_t)); + return ret; +} + + +/* encoding of proto types ****************************************************/ + +static bool startmsg(void *c, const void *hd) { + upb_pb_encoder *e = c; + UPB_UNUSED(hd); + if (e->depth++ == 0) { + upb_bytessink_start(e->output_, 0, &e->subc); } -#undef CASE + return true; } -static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return 0; +static bool endmsg(void *c, const void *hd, upb_status *status) { + upb_pb_encoder *e = c; + UPB_UNUSED(hd); + UPB_UNUSED(status); + if (--e->depth == 0) { + upb_bytessink_end(e->output_); } -#undef CASE + return true; } -static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, - upb_wire_type_t wt) -{ - return upb_put_UINT32(buf, wt | (num << 3)); +static void *encode_startdelimfield(void *c, const void *hd) { + bool ok = encode_tag(c, hd) && commit(c) && start_delim(c); + return ok ? c : UPB_BREAK; } -static uint32_t _upb_get_tag_size(upb_field_number_t num) -{ - return upb_get_UINT32_size(num << 3); +static bool encode_enddelimfield(void *c, const void *hd) { + UPB_UNUSED(hd); + return end_delim(c); } +static void *encode_startgroup(void *c, const void *hd) { + return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK; +} -/* upb_sizebuilder ************************************************************/ +static bool encode_endgroup(void *c, const void *hd) { + return encode_tag(c, hd) && commit(c); +} -struct upb_sizebuilder { - // Accumulating size for the current level. - uint32_t size; +static void *encode_startstr(void *c, const void *hd, size_t size_hint) { + UPB_UNUSED(size_hint); + return encode_startdelimfield(c, hd); +} - // Stack of sizes for our current nesting. - uint32_t stack[UPB_MAX_NESTING], *top; +static size_t encode_strbuf(void *c, const void *hd, const char *buf, + size_t len, const upb_bufhandle *h) { + UPB_UNUSED(hd); + UPB_UNUSED(h); + return encode_bytes(c, buf, len) ? len : 0; +} - // Vector of sizes. - uint32_t *sizes; - int sizes_len; - int sizes_size; +#define T(type, ctype, convert, encode) \ + static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \ + return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \ + } \ + static bool encode_packed_##type(void *e, const void *hd, ctype val) { \ + UPB_UNUSED(hd); \ + return encode(e, (convert)(val)); \ + } - upb_status status; -}; +T(double, double, dbl2uint64, encode_fixed64) +T(float, float, flt2uint32, encode_fixed32); +T(int64, int64_t, uint64_t, encode_varint); +T(int32, int32_t, uint32_t, encode_varint); +T(fixed64, uint64_t, uint64_t, encode_fixed64); +T(fixed32, uint32_t, uint32_t, encode_fixed32); +T(bool, bool, bool, encode_varint); +T(uint32, uint32_t, uint32_t, encode_varint); +T(uint64, uint64_t, uint64_t, encode_varint); +T(enum, int32_t, uint32_t, encode_varint); +T(sfixed32, int32_t, uint32_t, encode_fixed32); +T(sfixed64, int64_t, uint64_t, encode_fixed64); +T(sint32, int32_t, upb_zzenc_32, encode_varint); +T(sint64, int64_t, upb_zzenc_64, encode_varint); -// upb_sink callbacks. -static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += _upb_get_value_size(f->type, val); - sb->size += size; - return UPB_SINK_CONTINUE; -} +#undef T -static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; - (void)str; // String data itself is not used. - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(start >= 0) { - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += upb_get_UINT32_size(end - start); - sb->size += size; - } - return UPB_SINK_CONTINUE; -} -static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - (void)f; // Unused (we calculate tag size and delimiter in endcb). - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - *sb->top = sb->size; - sb->top++; - sb->size = 0; - } else { - assert(f->type == UPB_TYPE(GROUP)); - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} +/* code to build the handlers *************************************************/ + +static void newhandlers_callback(const void *closure, upb_handlers *h) { + UPB_UNUSED(closure); -static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - sb->top--; - if(sb->sizes_len == sb->sizes_size) { - sb->sizes_size *= 2; - sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); + upb_handlers_setstartmsg(h, startmsg, NULL); + upb_handlers_setendmsg(h, endmsg, NULL); + + const upb_msgdef *m = upb_handlers_msgdef(h); + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && + upb_fielddef_packed(f); + upb_handlerattr attr; + upb_wiretype_t wt = + packed ? UPB_WIRE_TYPE_DELIMITED + : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; + + // Pre-encode the tag for this field. + new_tag(h, f, wt, &attr); + + if (packed) { + upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr); + upb_handlers_setendseq(h, f, encode_enddelimfield, &attr); } - uint32_t child_size = sb->size; - uint32_t parent_size = *sb->top; - sb->sizes[sb->sizes_len++] = child_size; - // The size according to the parent includes the tag size and delimiter of - // the submessage. - parent_size += upb_get_UINT32_size(child_size); - parent_size += _upb_get_tag_size(f->number); - // Include size accumulated in parent before child began. - sb->size = child_size + parent_size; - } else { - assert(f->type == UPB_TYPE(GROUP)); - // As an optimization, we could just add this number twice in startcb, to - // avoid having to recalculate it. - sb->size += _upb_get_tag_size(f->number); + +#define T(upper, lower, upbtype) \ + case UPB_DESCRIPTOR_TYPE_##upper: \ + if (packed) { \ + upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \ + } else { \ + upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \ + } \ + break; + + switch (upb_fielddef_descriptortype(f)) { + T(DOUBLE, double, double); + T(FLOAT, float, float); + T(INT64, int64, int64); + T(INT32, int32, int32); + T(FIXED64, fixed64, uint64); + T(FIXED32, fixed32, uint32); + T(BOOL, bool, bool); + T(UINT32, uint32, uint32); + T(UINT64, uint64, uint64); + T(ENUM, enum, int32); + T(SFIXED32, sfixed32, int32); + T(SFIXED64, sfixed64, int64); + T(SINT32, sint32, int32); + T(SINT64, sint64, int64); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: + upb_handlers_setstartstr(h, f, encode_startstr, &attr); + upb_handlers_setendstr(h, f, encode_enddelimfield, &attr); + upb_handlers_setstring(h, f, encode_strbuf, &attr); + break; + case UPB_DESCRIPTOR_TYPE_MESSAGE: + upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr); + upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr); + break; + case UPB_DESCRIPTOR_TYPE_GROUP: { + // Endgroup takes a different tag (wire_type = END_GROUP). + upb_handlerattr attr2; + new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2); + + upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr); + upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2); + + upb_handlerattr_uninit(&attr2); + break; + } + } + +#undef T + + upb_handlerattr_uninit(&attr); } - return UPB_SINK_CONTINUE; } -upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { - _upb_sizebuilder_valuecb, - _upb_sizebuilder_strcb, - _upb_sizebuilder_startcb, - _upb_sizebuilder_endcb -}; - - -/* upb_sink callbacks *********************************************************/ - -struct upb_encoder { - upb_sink base; - //upb_bytesink *bytesink; - uint32_t *sizes; - int size_offset; -}; - - -// Within one callback we may need to encode up to two separate values. -#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) - -static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, - size_t len, upb_status *status) -{ - // TODO: conjure a upb_strptr that points to buf. - //upb_strptr ptr; - (void)s; - (void)buf; - (void)status; - size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); - if(written < len) { - // TODO: mark to skip "written" bytes next time. - return UPB_SINK_STOP; - } else { - return UPB_SINK_CONTINUE; - } + +/* public API *****************************************************************/ + +const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, + const void *owner) { + return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL); } -static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - upb_wire_type_t wt = upb_types[f->type].expected_wire_type; - // TODO: handle packed encoding. - ptr = _upb_put_tag(ptr, f->number, wt); - ptr = upb_encode_value(ptr, f->type, val); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); +#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0])) + +void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) { + e->output_ = NULL; + e->subc = NULL; + e->buf = e->initbuf; + e->ptr = e->buf; + e->limit = e->buf + ARRAYSIZE(e->initbuf); + e->segbuf = e->seginitbuf; + e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf); + e->stacklimit = e->stack + ARRAYSIZE(e->stack); + upb_sink_reset(&e->input_, h, e); } -static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(start >= 0) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, end - start); +void upb_pb_encoder_uninit(upb_pb_encoder *e) { + if (e->buf != e->initbuf) { + free(e->buf); } - // TODO: properly handle partially consumed strings and partially supplied - // strings. - _upb_encoder_push_buf(s, buf, ptr - buf, status); - return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); -} -static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type == UPB_TYPE(GROUP)) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); - } else { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); + if (e->segbuf != e->seginitbuf) { + free(e->segbuf); } - return _upb_encoder_push_buf(s, buf, ptr - buf, status); } -static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); +void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) { + upb_pb_encoder_reset(e); + e->output_ = output; + e->subc = output->closure; } -upb_sink_callbacks _upb_encoder_sink_vtbl = { - _upb_encoder_valuecb, - _upb_encoder_strcb, - _upb_encoder_startcb, - _upb_encoder_endcb -}; +void upb_pb_encoder_reset(upb_pb_encoder *e) { + e->segptr = NULL; + e->top = NULL; + e->depth = 0; +} +upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; } diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h index 563b78d..2df5797 100644 --- a/upb/pb/encoder.h +++ b/upb/pb/encoder.h @@ -7,52 +7,155 @@ * Implements a set of upb_handlers that write protobuf data to the binary wire * format. * - * For messages that have any submessages, the encoder needs a buffer - * containing the submessage sizes, so they can be properly written at the - * front of each message. Note that groups do *not* have this requirement. + * This encoder implementation does not have any access to any out-of-band or + * precomputed lengths for submessages, so it must buffer submessages internally + * before it can emit the first byte. */ #ifndef UPB_ENCODER_H_ #define UPB_ENCODER_H_ -#include "upb/upb.h" -#include "upb/bytestream.h" +#include "upb/sink.h" #ifdef __cplusplus -extern "C" { +namespace upb { +namespace pb { +class Encoder; +} // namespace pb +} // namespace upb #endif -/* upb_encoder ****************************************************************/ +UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder); -// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol -// buffer binary wire format. -struct upb_encoder; -typedef struct upb_encoder upb_encoder; +#define UPB_PBENCODER_MAX_NESTING 100 -upb_encoder *upb_encoder_new(upb_msgdef *md); -void upb_encoder_free(upb_encoder *e); +/* upb::pb::Encoder ***********************************************************/ -// Resets the given upb_encoder such that is is ready to begin encoding, -// outputting data to "bytesink" (which must live until the encoder is -// reset or destroyed). -void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); +// The output buffer is divided into segments; a segment is a string of data +// that is "ready to go" -- it does not need any varint lengths inserted into +// the middle. The seams between segments are where varints will be inserted +// once they are known. +// +// We also use the concept of a "run", which is a range of encoded bytes that +// occur at a single submessage level. Every segment contains one or more runs. +// +// A segment can span messages. Consider: +// +// .--Submessage lengths---------. +// | | | +// | V V +// V | |--------------- | |----------------- +// Submessages: | |----------------------------------------------- +// Top-level msg: ------------------------------------------------------------ +// +// Segments: ----- ------------------- ----------------- +// Runs: *---- *--------------*--- *---------------- +// (* marks the start) +// +// Note that the top-level menssage is not in any segment because it does not +// have any length preceding it. +// +// A segment is only interrupted when another length needs to be inserted. So +// observe how the second segment spans both the inner submessage and part of +// the next enclosing message. +typedef struct { + UPB_PRIVATE_FOR_CPP + uint32_t msglen; // The length to varint-encode before this segment. + uint32_t seglen; // Length of the segment. +} upb_pb_encoder_segment; -// Returns the upb_sink to which data can be written. The sink is invalidated -// when the encoder is reset or destroyed. Note that if the client wants to -// encode any length-delimited submessages it must first call -// upb_encoder_buildsizes() below. -upb_sink *upb_encoder_sink(upb_encoder *e); +UPB_DEFINE_CLASS0(upb::pb::Encoder, + public: + Encoder(const upb::Handlers* handlers); + ~Encoder(); -// Call prior to pushing any data with embedded submessages. "src" must yield -// exactly the same data as what will next be encoded, but in reverse order. -// The encoder iterates over this data in order to determine the sizes of the -// submessages. If any errors are returned by the upb_src, the status will -// be saved in *status. If the client is sure that the upb_src will not throw -// any errors, "status" may be NULL. -void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); + static reffed_ptr NewHandlers(const upb::MessageDef* msg); + + // Resets the state of the printer, so that it will expect to begin a new + // document. + void Reset(); + + // Resets the output pointer which will serve as our closure. + void ResetOutput(BytesSink* output); + + // The input to the encoder. + Sink* input(); + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(Encoder); +, +UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE( + // Our input and output. + upb_sink input_; + upb_bytessink *output_; + + // The "subclosure" -- used as the inner closure as part of the bytessink + // protocol. + void *subc; + + // The output buffer and limit, and our current write position. "buf" + // initially points to "initbuf", but is dynamically allocated if we need to + // grow beyond the initial size. + char *buf, *ptr, *limit; + + // The beginning of the current run, or undefined if we are at the top level. + char *runbegin; + + // The list of segments we are accumulating. + upb_pb_encoder_segment *segbuf, *segptr, *seglimit; + + // The stack of enclosing submessages. Each entry in the stack points to the + // segment where this submessage's length is being accumulated. + int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit; + + // Depth of startmsg/endmsg calls. + int depth; + + // Initial buffers for the output buffer and segment buffer. If we outgrow + // these we will dynamically allocate bigger ones. + char initbuf[256]; + upb_pb_encoder_segment seginitbuf[32]; +))); + +UPB_BEGIN_EXTERN_C + +const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, + const void *owner); +void upb_pb_encoder_reset(upb_pb_encoder *e); +upb_sink *upb_pb_encoder_input(upb_pb_encoder *p); +void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h); +void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output); +void upb_pb_encoder_uninit(upb_pb_encoder *e); + +UPB_END_EXTERN_C #ifdef __cplusplus -} /* extern "C" */ + +namespace upb { +namespace pb { +inline Encoder::Encoder(const upb::Handlers* handlers) { + upb_pb_encoder_init(this, handlers); +} +inline Encoder::~Encoder() { + upb_pb_encoder_uninit(this); +} +inline void Encoder::Reset() { + upb_pb_encoder_reset(this); +} +inline void Encoder::ResetOutput(BytesSink* output) { + upb_pb_encoder_resetoutput(this, output); +} +inline Sink* Encoder::input() { + return upb_pb_encoder_input(this); +} +inline reffed_ptr Encoder::NewHandlers( + const upb::MessageDef *md) { + const Handlers* h = upb_pb_encoder_newhandlers(md, &h); + return reffed_ptr(h, &h); +} +} // namespace pb +} // namespace upb + #endif #endif /* UPB_ENCODER_H_ */ diff --git a/upb/pb/varint.c b/upb/pb/varint.c index ccd752d..365deb4 100644 --- a/upb/pb/varint.c +++ b/upb/pb/varint.c @@ -7,6 +7,29 @@ #include "upb/pb/varint.int.h" +// Index is descriptor type. +const uint8_t upb_pb_native_wire_types[] = { + UPB_WIRE_TYPE_END_GROUP, // ENDGROUP + UPB_WIRE_TYPE_64BIT, // DOUBLE + UPB_WIRE_TYPE_32BIT, // FLOAT + UPB_WIRE_TYPE_VARINT, // INT64 + UPB_WIRE_TYPE_VARINT, // UINT64 + UPB_WIRE_TYPE_VARINT, // INT32 + UPB_WIRE_TYPE_64BIT, // FIXED64 + UPB_WIRE_TYPE_32BIT, // FIXED32 + UPB_WIRE_TYPE_VARINT, // BOOL + UPB_WIRE_TYPE_DELIMITED, // STRING + UPB_WIRE_TYPE_START_GROUP, // GROUP + UPB_WIRE_TYPE_DELIMITED, // MESSAGE + UPB_WIRE_TYPE_DELIMITED, // BYTES + UPB_WIRE_TYPE_VARINT, // UINT32 + UPB_WIRE_TYPE_VARINT, // ENUM + UPB_WIRE_TYPE_32BIT, // SFIXED32 + UPB_WIRE_TYPE_64BIT, // SFIXED64 + UPB_WIRE_TYPE_VARINT, // SINT32 + UPB_WIRE_TYPE_VARINT, // SINT64 +}; + // A basic branch-based decoder, uses 32-bit values to get good performance // on 32-bit architectures (but performs well on 64-bits also). // This scheme comes from the original Google Protobuf implementation (proto2). diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h index d92fef9..8498acd 100644 --- a/upb/pb/varint.int.h +++ b/upb/pb/varint.int.h @@ -37,6 +37,10 @@ typedef enum { // wiki document about this). #define UPB_PB_VARINT_MAX_LEN 10 +// Array of the "native" (ie. non-packed-repeated) wire type for the given a +// descriptor type (upb_descriptortype_t). +extern const uint8_t upb_pb_native_wire_types[]; + /* Zig-zag encoding/decoding **************************************************/ UPB_INLINE int32_t upb_zzdec_32(uint32_t n) { @@ -129,6 +133,11 @@ UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) { return i; } +UPB_INLINE size_t upb_varint_size(uint64_t val) { + char buf[UPB_PB_VARINT_MAX_LEN]; + return upb_vencode64(val, buf); +} + // Encodes a 32-bit varint, *not* sign-extended. UPB_INLINE uint64_t upb_vencode32(uint32_t val) { char buf[UPB_PB_VARINT_MAX_LEN]; diff --git a/upb/table.c b/upb/table.c index 3fd4b0f..63bb068 100644 --- a/upb/table.c +++ b/upb/table.c @@ -42,14 +42,36 @@ char *upb_strdup(const char *s) { return p; } -static upb_tabkey strkey(const char *str) { - upb_tabkey k; - k.str = (char*)str; +// A type to represent the lookup key of either a strtable or an inttable. +// This is like upb_tabkey, but can carry a size also to allow lookups of +// non-NULL-terminated strings (we don't store string lengths in the table). +typedef struct { + upb_tabkey key; + uint32_t len; // For string keys only. +} lookupkey_t; + +static lookupkey_t strkey(const char *str) { + lookupkey_t k; + k.key.str = (char*)str; + k.len = strlen(str); return k; } -typedef const upb_tabent *hashfunc_t(const upb_table *t, upb_tabkey key); -typedef bool eqlfunc_t(upb_tabkey k1, upb_tabkey k2); +static lookupkey_t strkey2(const char *str, size_t len) { + lookupkey_t k; + k.key.str = (char*)str; + k.len = len; + return k; +} + +static lookupkey_t intkey(uintptr_t key) { + lookupkey_t k; + k.key = upb_intkey(key); + return k; +} + +typedef uint32_t hashfunc_t(upb_tabkey key); +typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); /* Base table (shared code) ***************************************************/ @@ -85,10 +107,14 @@ static upb_tabent *emptyent(upb_table *t) { while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } } -static const upb_tabent *findentry(const upb_table *t, upb_tabkey key, - hashfunc_t *hash, eqlfunc_t *eql) { +static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) { + return (upb_tabent*)upb_getentry(t, hash); +} + +static const upb_tabent *findentry(const upb_table *t, lookupkey_t key, + uint32_t hash, eqlfunc_t *eql) { if (t->size_lg2 == 0) return NULL; - const upb_tabent *e = hash(t, key); + const upb_tabent *e = upb_getentry(t, hash); if (upb_tabent_isempty(e)) return NULL; while (1) { if (eql(e->key, key)) return e; @@ -96,8 +122,13 @@ static const upb_tabent *findentry(const upb_table *t, upb_tabkey key, } } -static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v, - hashfunc_t *hash, eqlfunc_t *eql) { +static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key, + uint32_t hash, eqlfunc_t *eql) { + return (upb_tabent*)findentry(t, key, hash, eql); +} + +static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v, + uint32_t hash, eqlfunc_t *eql) { const upb_tabent *e = findentry(t, key, hash, eql); if (e) { if (v) { @@ -110,13 +141,13 @@ static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v, } // The given key must not already exist in the table. -static void insert(upb_table *t, upb_tabkey key, upb_value val, - hashfunc_t *hash, eqlfunc_t *eql) { +static void insert(upb_table *t, lookupkey_t key, upb_value val, + uint32_t hash, hashfunc_t *hashfunc, eqlfunc_t *eql) { UPB_UNUSED(eql); assert(findentry(t, key, hash, eql) == NULL); assert(val.ctype == t->ctype); t->count++; - upb_tabent *mainpos_e = (upb_tabent*)hash(t, key); + upb_tabent *mainpos_e = getentry_mutable(t, hash); upb_tabent *our_e = mainpos_e; if (upb_tabent_isempty(mainpos_e)) { // Our main position is empty; use it. @@ -125,7 +156,7 @@ static void insert(upb_table *t, upb_tabkey key, upb_value val, // Collision. upb_tabent *new_e = emptyent(t); // Head of collider's chain. - upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key); + upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key)); if (chain == mainpos_e) { // Existing ent is in its main posisiton (it has the same hash as us, and // is the head of our chain). Insert to new ent and append to this chain. @@ -146,14 +177,14 @@ static void insert(upb_table *t, upb_tabkey key, upb_value val, our_e->next = NULL; } } - our_e->key = key; + our_e->key = key.key; our_e->val = val.val; assert(findentry(t, key, hash, eql) == our_e); } -static bool rm(upb_table *t, upb_tabkey key, upb_value *val, - upb_tabkey *removed, hashfunc_t *hash, eqlfunc_t *eql) { - upb_tabent *chain = (upb_tabent*)hash(t, key); +static bool rm(upb_table *t, lookupkey_t key, upb_value *val, + upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) { + upb_tabent *chain = getentry_mutable(t, hash); if (upb_tabent_isempty(chain)) return false; if (eql(chain->key, key)) { // Element to remove is at the head of its chain. @@ -210,13 +241,12 @@ static size_t begin(const upb_table *t) { // A simple "subclass" of upb_table that only adds a hash function for strings. -static const upb_tabent *strhash(const upb_table *t, upb_tabkey key) { - // Could avoid the strlen() by using a hash function that terminates on NULL. - return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask); +static uint32_t strhash(upb_tabkey key) { + return MurmurHash2(key.str, strlen(key.str), 0); } -static bool streql(upb_tabkey k1, upb_tabkey k2) { - return strcmp(k1.str, k2.str) == 0; +static bool streql(upb_tabkey k1, lookupkey_t k2) { + return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0'; } bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { @@ -252,17 +282,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { } } if ((k = upb_strdup(k)) == NULL) return false; - insert(&t->t, strkey(k), v, &strhash, &streql); + + lookupkey_t key = strkey(k); + uint32_t hash = MurmurHash2(key.key.str, key.len, 0); + insert(&t->t, strkey(k), v, hash, &strhash, &streql); return true; } -bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v) { - return lookup(&t->t, strkey(key), v, &strhash, &streql); +bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, + upb_value *v) { + uint32_t hash = MurmurHash2(key, len, 0); + return lookup(&t->t, strkey2(key, len), v, hash, &streql); } bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) { + uint32_t hash = MurmurHash2(key, strlen(key), 0); upb_tabkey tabkey; - if (rm(&t->t, strkey(key), val, &tabkey, &strhash, &streql)) { + if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) { free((void*)tabkey.str); return true; } else { @@ -317,8 +353,10 @@ bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, // For inttables we use a hybrid structure where small keys are kept in an // array and large keys are put in the hash table. -static bool inteql(upb_tabkey k1, upb_tabkey k2) { - return k1.num == k2.num; +static uint32_t inthash(upb_tabkey key) { return upb_inthash(key.num); } + +static bool inteql(upb_tabkey k1, lookupkey_t k2) { + return k1.num == k2.key.num; } static _upb_value *mutable_array(upb_inttable *t) { @@ -330,7 +368,7 @@ static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) { return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL; } else { upb_tabent *e = - (upb_tabent*)findentry(&t->t, upb_intkey(key), &upb_inthash, &inteql); + findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql); return e ? &e->val : NULL; } } @@ -402,7 +440,8 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { const upb_tabent *e = &t->t.entries[i]; upb_value v; _upb_value_setval(&v, e->val, t->t.ctype); - insert(&new_table, e->key, v, &upb_inthash, &inteql); + uint32_t hash = upb_inthash(e->key.num); + insert(&new_table, intkey(e->key.num), v, hash, &inthash, &inteql); } assert(t->t.count == new_table.count); @@ -410,7 +449,7 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { uninit(&t->t); t->t = new_table; } - insert(&t->t, upb_intkey(key), val, &upb_inthash, &inteql); + insert(&t->t, intkey(key), val, upb_inthash(key), &inthash, &inteql); } check(t); return true; @@ -446,7 +485,8 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { } } else { upb_tabkey removed; - success = rm(&t->t, upb_intkey(key), val, &removed, &upb_inthash, &inteql); + uint32_t hash = upb_inthash(key); + success = rm(&t->t, intkey(key), val, &removed, hash, &inteql); } check(t); return success; diff --git a/upb/table.int.h b/upb/table.int.h index 5e023c9..56891d7 100644 --- a/upb/table.int.h +++ b/upb/table.int.h @@ -25,6 +25,7 @@ #include #include +#include #include "upb.h" #ifdef __cplusplus @@ -219,20 +220,27 @@ UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } +// Used by some of the unit tests for generic hashing functionality. +uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed); + UPB_INLINE upb_tabkey upb_intkey(uintptr_t key) { - upb_tabkey k = {key}; return k; + upb_tabkey k; + k.num = key; + return k; +} + +UPB_INLINE uint32_t upb_inthash(uintptr_t key) { + return (uint32_t)key; } -UPB_INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { - return t->entries + ((uint32_t)key.num & t->mask); +static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { + return t->entries + (hash & t->mask); } UPB_INLINE bool upb_arrhas(_upb_value v) { return v.uint64 != (uint64_t)UPB_ARRAY_EMPTYVAL; } -uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); - // Initialize and uninitialize a table, respectively. If memory allocation // failed, false is returned that the table is uninitialized. bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype); @@ -259,7 +267,14 @@ bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); // Looks up key in this table, returning "true" if the key was found. // If v is non-NULL, copies the value for this key into *v. bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v); -bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v); +bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, + upb_value *v); + +// For NULL-terminated strings. +UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, + upb_value *v) { + return upb_strtable_lookup2(t, key, strlen(key), v); +} // Removes an item from the table. Returns true if the remove was successful, // and stores the removed item in *val if non-NULL. @@ -302,7 +317,7 @@ UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key, } else { const upb_tabent *e; if (t->t.entries == NULL) return false; - for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) { if ((uint32_t)e->key.num == key) { _upb_value_setval(v, e->val, t->t.ctype); return true; -- cgit v1.2.3