From 3d0c7c45da5b72a88bfb03dc5ce3384b7f01cef6 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 18 Nov 2014 15:21:50 -0800 Subject: Sync to Google-internal development. --- upb/bindings/googlepb/bridge.cc | 1 + upb/bindings/googlepb/proto1.cc | 27 + upb/bindings/googlepb/proto2.cc | 220 +++++-- upb/bindings/lua/upb.c | 21 +- upb/bindings/ruby/README | 2 - upb/bindings/ruby/README.md | 30 + upb/bindings/ruby/extconf.rb | 8 +- upb/bindings/ruby/upb.c | 1209 ++++++++++++++++++++++++++++++++------- 8 files changed, 1254 insertions(+), 264 deletions(-) delete mode 100644 upb/bindings/ruby/README create mode 100644 upb/bindings/ruby/README.md (limited to 'upb/bindings') diff --git a/upb/bindings/googlepb/bridge.cc b/upb/bindings/googlepb/bridge.cc index a666ff6..6ae8868 100644 --- a/upb/bindings/googlepb/bridge.cc +++ b/upb/bindings/googlepb/bridge.cc @@ -115,6 +115,7 @@ reffed_ptr DefBuilder::NewFieldDef(const goog::FieldDescriptor* f, upb_f->set_number(f->number(), &status); upb_f->set_label(FieldDef::ConvertLabel(f->label())); upb_f->set_descriptor_type(FieldDef::ConvertDescriptorType(f->type())); + upb_f->set_packed(f->options().packed()); #ifdef UPB_GOOGLE3 upb_f->set_lazy(f->options().lazy()); #endif diff --git a/upb/bindings/googlepb/proto1.cc b/upb/bindings/googlepb/proto1.cc index 0b46fed..68b572c 100644 --- a/upb/bindings/googlepb/proto1.cc +++ b/upb/bindings/googlepb/proto1.cc @@ -30,6 +30,10 @@ #undef private #undef protected +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS +namespace proto2 { class Arena; } +#endif + #include "upb/def.h" #include "upb/handlers.h" #include "upb/shim/shim.h" @@ -448,12 +452,35 @@ class P2R_Handlers { class RepeatedMessageTypeHandler { public: typedef proto2::Message Type; +#ifndef GOOGLE_PROTOBUF_HAS_ARENAS // AddAllocated() calls this, but only if other objects are sitting // around waiting for reuse, which we will not do. static void Delete(Type* t) { UPB_UNUSED(t); assert(false); } +#else + static ::proto2::Arena* GetArena(Type* t) { + return t->GetArena(); + } + static void* GetMaybeArenaPointer(Type* t) { + return t->GetMaybeArenaPointer(); + } + static inline Type* NewFromPrototype( + const Type* prototype, ::proto2::Arena* arena = NULL) { + return prototype->New(arena); + } + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t, ::proto2::Arena* arena) { + UPB_UNUSED(t); + UPB_UNUSED(arena); + assert(false); + } + static void Merge(const Type& from, Type* to) { + to->MergeFrom(from); + } +#endif }; // Closure is a RepeatedPtrField*, but we access it through diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc index 657f802..498ae2d 100644 --- a/upb/bindings/googlepb/proto2.cc +++ b/upb/bindings/googlepb/proto2.cc @@ -261,11 +261,64 @@ case goog::FieldDescriptor::cpptype: \ return r->offsets_[index]; } - class FieldOffset { + // Base class that provides access to elements of the message as a whole, such + // as the unknown-field set, and is inherited by context classes for specific + // field handlers. + class FieldDataBase { + public: + FieldDataBase(const goog::internal::GeneratedMessageReflection* r) + : unknown_fields_offset_(r->unknown_fields_offset_) +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + , arena_offset_(r->arena_offset_) +#endif // GOOGLE_PROTOBUF_HAS_ARENAS + {} + +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + goog::Arena* GetArena(const goog::Message& message) const { + if (unknown_fields_offset_ == + goog::internal::GeneratedMessageReflection:: + kUnknownFieldSetInMetadata) { + const goog::internal::InternalMetadataWithArena* metadata = + GetConstPointer( + &message, arena_offset_); + return metadata->arena(); + } else if (arena_offset_ != + goog::internal::GeneratedMessageReflection::kNoArenaPointer) { + return *GetConstPointer(&message, arena_offset_); + } else { + return NULL; + } + } + + goog::UnknownFieldSet* GetUnknownFieldSet(goog::Message* message) const { + if (unknown_fields_offset_ == + goog::internal::GeneratedMessageReflection:: + kUnknownFieldSetInMetadata) { + goog::internal::InternalMetadataWithArena* metadata = + GetPointer( + message, arena_offset_); + return metadata->mutable_unknown_fields(); + } + return GetPointer(message, unknown_fields_offset_); + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + goog::UnknownFieldSet* GetUnknownFieldSet(goog::Message* message) const { + return GetPointer(message, unknown_fields_offset_); + } +#endif // ifdef !GOOGLE_PROTOBUF_HAS_ARENAS + private: + int unknown_fields_offset_; +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + int arena_offset_; +#endif // GOOGLE_PROTOBUF_HAS_ARENAS + }; + + class FieldOffset : public FieldDataBase { public: FieldOffset(const goog::FieldDescriptor* f, const goog::internal::GeneratedMessageReflection* r) - : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { + : FieldDataBase(r), + offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { if (!is_repeated_) { int64_t hasbit = GetHasbit(f, r); hasbyte_ = hasbit / 8; @@ -293,11 +346,12 @@ case goog::FieldDescriptor::cpptype: \ }; #ifdef GOOGLE_PROTOBUF_HAS_ONEOF - class OneofFieldData { + class OneofFieldData : public FieldDataBase { public: OneofFieldData(const goog::FieldDescriptor* f, const goog::internal::GeneratedMessageReflection* r) - : field_number_offset_(GetOneofDiscriminantOffset(f, r)), + : FieldDataBase(r), + field_number_offset_(GetOneofDiscriminantOffset(f, r)), field_number_(f->number()) { const goog::OneofDescriptor* oneof = f->containing_oneof(); @@ -343,6 +397,40 @@ case goog::FieldDescriptor::cpptype: \ return GetPointer(message, field_number_offset_); } + void ClearOneof(goog::Message* m, const FieldOffset* ofs, + int field_number) const { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + if (GetArena(*m) != NULL) { + return; + } +#endif + switch (types_.at(field_number)) { + case ONEOF_TYPE_NONE: + break; + case ONEOF_TYPE_STRING: + delete *ofs->GetFieldPointer(m); + break; + case ONEOF_TYPE_MESSAGE: + delete *ofs->GetFieldPointer(m); + break; +#ifdef UPB_GOOGLE3 + case ONEOF_TYPE_GLOBALSTRING: + delete *ofs->GetFieldPointer(m); + break; + case ONEOF_TYPE_CORD: + delete *ofs->GetFieldPointer(m); + break; + case ONEOF_TYPE_STRINGPIECE: + delete *ofs->GetFieldPointer< + goog::internal::StringPieceField*>(m); + break; + case ONEOF_TYPE_LAZYFIELD: + delete *ofs->GetFieldPointer(m); + break; +#endif + } + } + // Returns whether this is different than the previous value of the // field_number; this implies that the current value was freed (if // necessary) and the caller should allocate a new instance. @@ -351,30 +439,7 @@ case goog::FieldDescriptor::cpptype: \ if (*field_number == field_number_) { return false; } else { - switch (types_.at(*field_number)) { - case ONEOF_TYPE_NONE: - break; - case ONEOF_TYPE_STRING: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_MESSAGE: - delete *ofs->GetFieldPointer(m); - break; -#ifdef UPB_GOOGLE3 - case ONEOF_TYPE_GLOBALSTRING: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_CORD: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_STRINGPIECE: - delete *ofs->GetFieldPointer(m); - break; - case ONEOF_TYPE_LAZYFIELD: - delete *ofs->GetFieldPointer(m); - break; -#endif - } + ClearOneof(m, ofs, *field_number); *field_number = field_number_; return true; } @@ -578,7 +643,6 @@ case goog::FieldDescriptor::cpptype: \ const upb::FieldDef* f) : FieldOffset(proto2_f, r), field_number_(f->number()), - unknown_fields_offset_(r->unknown_fields_offset_), enum_(upb_downcast_enumdef(f->subdef())) {} bool IsValidValue(int32_t val) const { @@ -587,13 +651,8 @@ case goog::FieldDescriptor::cpptype: \ int32_t field_number() const { return field_number_; } - goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { - return GetPointer(m, unknown_fields_offset_); - } - private: int32_t field_number_; - size_t unknown_fields_offset_; const upb::EnumDef* enum_; }; @@ -617,7 +676,7 @@ case goog::FieldDescriptor::cpptype: \ *message_val = val; data->SetHasbit(m); } else { - data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + data->GetUnknownFieldSet(m)->AddVarint(data->field_number(), val); } } @@ -631,7 +690,7 @@ case goog::FieldDescriptor::cpptype: \ data->GetFieldPointer >(m); r->Add(val); } else { - data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + data->GetUnknownFieldSet(m)->AddVarint(data->field_number(), val); } } @@ -718,7 +777,14 @@ case goog::FieldDescriptor::cpptype: \ T** str = data->GetStringPointer(m); data->SetHasbit(m); // If it points to the default instance, we must create a new instance. - if (*str == data->prototype()) *str = new T(); + if (*str == data->prototype()) { + *str = new T(); +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + if (data->GetArena(*m)) { + data->GetArena(*m)->Own(*str); + } +#endif + } (*str)->clear(); // reserve() here appears to hurt performance rather than help. return *str; @@ -749,6 +815,16 @@ case goog::FieldDescriptor::cpptype: \ T** str = ofs->GetFieldPointer(m); if (data->SetOneofHas(m)) { *str = new T(); +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + // Note that in the main proto2-arenas implementation, the parsing code + // creates ArenaString instances for string field data, and the + // implementation later dynamically converts to ::string if a mutable + // version is requested. To keep complexity down in this binding, we + // create an ordinary string and allow the arena to own its destruction. + if (data->GetArena(*m) != NULL) { + data->GetArena(*m)->Own(*str); + } +#endif } else { (*str)->clear(); } @@ -857,7 +933,11 @@ case goog::FieldDescriptor::cpptype: \ data->SetHasbit(m); goog::Message** subm = data->GetFieldPointer(m); if (*subm == NULL || *subm == data->prototype()) { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + *subm = data->prototype()->New(data->GetArena(*m)); +#else *subm = data->prototype()->New(); +#endif } return *subm; } @@ -865,14 +945,50 @@ case goog::FieldDescriptor::cpptype: \ class RepeatedMessageTypeHandler { public: typedef goog::Message Type; +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + static ::proto2::Arena* GetArena(Type* t) { + return t->GetArena(); + } + static void* GetMaybeArenaPointer(Type* t) { + return t->GetMaybeArenaPointer(); + } + static inline Type* NewFromPrototype( + const Type* prototype, ::proto2::Arena* arena = NULL) { + return prototype->New(arena); + } + static void Delete(Type* t, goog::Arena* arena = NULL) { + if (arena == NULL) { + delete t; + } + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + static inline Type* NewFromPrototype(const Type* prototype) { + return prototype->New(); + } // AddAllocated() calls this, but only if other objects are sitting // around waiting for reuse, which we will not do. static void Delete(Type* t) { UPB_UNUSED(t); assert(false); } +#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + + static void Merge(const Type& from, Type* to) { + to->MergeFrom(from); + } }; +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static goog::Message* StartRepeatedSubMessage( + goog::internal::RepeatedPtrFieldBase* r, + const SubMessageHandlerData* data) { + goog::Message* submsg = data->prototype()->New(r->GetArenaNoVirtual()); + r->AddAllocated(submsg); + return submsg; + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS // Closure is a RepeatedPtrField*, but we access it through // its base class RepeatedPtrFieldBase*. static goog::Message* StartRepeatedSubMessage( @@ -886,13 +1002,19 @@ case goog::FieldDescriptor::cpptype: \ return submsg; } +#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS + #ifdef GOOGLE_PROTOBUF_HAS_ONEOF static goog::Message* StartOneofSubMessage( goog::Message* m, const OneofSubMessageHandlerData* data) { const FieldOffset* ofs = data; goog::Message** subm = ofs->GetFieldPointer(m); if (data->SetOneofHas(m)) { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + *subm = data->prototype()->New(data->GetArena(*m)); +#else *subm = data->prototype()->New(); +#endif } return *subm; } @@ -1123,9 +1245,21 @@ case goog::FieldDescriptor::cpptype: \ LazyMessageExtensionImpl() {} virtual ~LazyMessageExtensionImpl() {} +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + virtual LazyMessageExtension* New() const { + return New(NULL); + } + + virtual LazyMessageExtension* New(proto2::Arena* arena) const { + LazyMessageExtensionImpl* message = + ::proto2::Arena::Create(arena); + return message; + } +#else // ifdef GOOGLE_PROTOBUF_HAS_ARENAS virtual LazyMessageExtension* New() const { return new LazyMessageExtensionImpl(); } +#endif // ifdef GOOGLE_PROTOBUF_HAS_ARENAS virtual const proto2::MessageLite& GetMessage( const proto2::MessageLite& prototype) const { @@ -1149,6 +1283,12 @@ case goog::FieldDescriptor::cpptype: \ static_cast(prototype)); } + virtual proto2::MessageLite* UnsafeArenaReleaseMessage( + const proto2::MessageLite& prototype) { + return lazy_field_.UnsafeArenaReleaseByPrototype( + static_cast(prototype)); + } + virtual bool IsInitialized() const { return true; } virtual int ByteSize() const { return lazy_field_.MessageByteSize(); } @@ -1201,7 +1341,13 @@ case goog::FieldDescriptor::cpptype: \ LazyMessageExtensionImpl* lazy_extension; if (set->MaybeNewExtension(data->number(), data->field_descriptor(), &item)) { +#ifdef GOOGLE_PROTOBUF_HAS_ARENAS + lazy_extension = + ::proto2::Arena::Create( + m->GetArena()); +#else lazy_extension = new LazyMessageExtensionImpl(); +#endif item->type = UPB_DESCRIPTOR_TYPE_MESSAGE; item->is_repeated = false; item->is_lazy = true; diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index f257430..2bd78af 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -640,6 +640,12 @@ static int lupb_fielddef_number(lua_State *L) { return 1; } +static int lupb_fielddef_packed(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_packed(f)); + return 1; +} + static int lupb_fielddef_subdef(lua_State *L) { const upb_fielddef *f = lupb_fielddef_check(L, 1); if (!upb_fielddef_hassubdef(f)) @@ -753,6 +759,12 @@ static int lupb_fielddef_setnumber(lua_State *L) { return 0; } +static int lupb_fielddef_setpacked(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + upb_fielddef_setpacked(f, lupb_checkbool(L, 2)); + return 0; +} + static int lupb_fielddef_setsubdef(lua_State *L) { upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); const upb_def *def = NULL; @@ -817,6 +829,7 @@ static const struct luaL_Reg lupb_fielddef_m[] = { {"lazy", lupb_fielddef_lazy}, {"name", lupb_fielddef_name}, {"number", lupb_fielddef_number}, + {"packed", lupb_fielddef_packed}, {"subdef", lupb_fielddef_subdef}, {"subdef_name", lupb_fielddef_subdefname}, {"type", lupb_fielddef_type}, @@ -828,6 +841,7 @@ static const struct luaL_Reg lupb_fielddef_m[] = { {"set_lazy", lupb_fielddef_setlazy}, {"set_name", lupb_fielddef_setname}, {"set_number", lupb_fielddef_setnumber}, + {"set_packed", lupb_fielddef_setpacked}, {"set_subdef", lupb_fielddef_setsubdef}, {"set_subdef_name", lupb_fielddef_setsubdefname}, {"set_type", lupb_fielddef_settype}, @@ -926,7 +940,7 @@ static int lupb_msgdef_field(lua_State *L) { if (type == LUA_TNUMBER) { f = upb_msgdef_itof(m, lua_tointeger(L, 2)); } else if (type == LUA_TSTRING) { - f = upb_msgdef_ntof(m, lua_tostring(L, 2)); + f = upb_msgdef_ntofz(m, lua_tostring(L, 2)); } else { const char *msg = lua_pushfstring(L, "number or string expected, got %s", luaL_typename(L, 2)); @@ -1358,8 +1372,9 @@ const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg) { static const upb_fielddef *lupb_msg_checkfield(lua_State *L, const lupb_msgdef *lmd, int fieldarg) { - const char *fieldname = luaL_checkstring(L, fieldarg); - const upb_fielddef *f = upb_msgdef_ntof(lmd->md, fieldname); + size_t len; + const char *fieldname = luaL_checklstring(L, fieldarg, &len); + const upb_fielddef *f = upb_msgdef_ntof(lmd->md, fieldname, len); if (!f) { const char *msg = lua_pushfstring(L, "no such field: %s", fieldname); diff --git a/upb/bindings/ruby/README b/upb/bindings/ruby/README deleted file mode 100644 index 50fd746..0000000 --- a/upb/bindings/ruby/README +++ /dev/null @@ -1,2 +0,0 @@ -This is PROTOTYPE code -- all interfaces are experimental -and will almost certainly change. diff --git a/upb/bindings/ruby/README.md b/upb/bindings/ruby/README.md new file mode 100644 index 0000000..12a7169 --- /dev/null +++ b/upb/bindings/ruby/README.md @@ -0,0 +1,30 @@ + +# Ruby extension + +To build, run (from the top upb directory): + + $ make ruby + $ sudo make install + +To test, run: + + $ make rubytest + +The binding currently supports: + + - loading message types from descriptors. + - constructing message instances + - reading and writing their members + - parsing and serializing the messages + - all data types (including nested and repeated) + +The binding does *not* currently support: + + - defining message types directly in Ruby code. + - generating Ruby code for a .proto file. + - type-checking for setters + - homogenous / type-checked arrays + - default values + +Because code generation is not currently implemented, the interface to import +a specific message type is kind of clunky for the moment. diff --git a/upb/bindings/ruby/extconf.rb b/upb/bindings/ruby/extconf.rb index 3637511..b105948 100644 --- a/upb/bindings/ruby/extconf.rb +++ b/upb/bindings/ruby/extconf.rb @@ -1,9 +1,13 @@ #!/usr/bin/ruby require 'mkmf' + +# Extra args are passed on the command-line. +$CFLAGS += (" " + ARGV[0]) + find_header("upb/upb.h", "../../..") or raise "Can't find upb headers" find_library("upb_pic", "upb_msgdef_new", "../../../lib") or raise "Can't find upb lib" -find_library("upb.pb_pic", "upb_decoder_init", "../../../lib") or raise "Can't find upb.pb lib" find_library("upb.descriptor_pic", "upb_descreader_init", "../../../lib") or raise "Can't find upb.descriptor lib" -$CFLAGS += " -Wall" +find_library("upb.pb_pic", "upb_pbdecoder_init", "../../../lib") or raise "Can't find upb.pb lib" + create_makefile("upb") diff --git a/upb/bindings/ruby/upb.c b/upb/bindings/ruby/upb.c index 0d25610..2817a15 100644 --- a/upb/bindings/ruby/upb.c +++ b/upb/bindings/ruby/upb.c @@ -1,42 +1,41 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * upb (prototype) extension for Ruby. + */ + +#include "ruby/ruby.h" +#include "ruby/vm.h" -#include "ruby.h" #include "upb/def.h" #include "upb/handlers.h" #include "upb/pb/decoder.h" +#include "upb/pb/encoder.h" #include "upb/pb/glue.h" #include "upb/shim/shim.h" #include "upb/symtab.h" +// References to global state. +// +// Ruby does not have multi-VM support and it is common practice to store +// references to classes and other per-VM state in global variables. +static VALUE cSymbolTable; static VALUE cMessageDef; static VALUE cMessage; +static VALUE message_map; +static upb_inttable objcache; +static bool objcache_initialized = false; -// Wrapper around a upb_msgdef. -typedef struct { - // The msgdef for this message, and a DecoderMethod to parse protobufs and - // fill a message. - // - // We own refs on both of these. - const upb_msgdef *md; - const upb_pbdecodermethod *fill_method; - - size_t size; - uint32_t *field_offsets; -} rb_msgdef; - -// Ruby message object. -// This will be sized according to what fields are actually present. -typedef struct { - union u { - VALUE rbmsgdef; - char data[1]; - } data; -} rb_msg; +struct rupb_Message; +struct rupb_MessageDef; +typedef struct rupb_Message rupb_Message; +typedef struct rupb_MessageDef rupb_MessageDef; -#define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs]) - -static void symtab_free(void *md) { - upb_symtab_unref(md, UPB_UNTRACKED_REF); -} +#define DEREF_RAW(ptr, ofs, type) *(type*)((char*)ptr + ofs) +#define DEREF(msg, ofs, type) *(type*)(&msg->data[ofs]) void rupb_checkstatus(upb_status *s) { if (!upb_ok(s)) { @@ -44,69 +43,195 @@ void rupb_checkstatus(upb_status *s) { } } -/* handlers *******************************************************************/ +static rupb_MessageDef *msgdef_get(VALUE self); +static rupb_Message *msg_get(VALUE self); +static const rupb_MessageDef *get_rbmsgdef(const upb_msgdef *md); +static const upb_handlers *new_fill_handlers(const rupb_MessageDef *rmd, + const void *owner); +static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd, + upb_sink *sink); +static VALUE msgdef_getwrapper(const upb_msgdef *md); +static VALUE new_message_class(VALUE message_def); +static VALUE get_message_class(VALUE klass, VALUE message); +static VALUE msg_new(VALUE msgdef); + +/* Ruby VALUE <-> C primitive conversions *************************************/ + +// Ruby VALUE -> C. +// TODO(haberman): add type/range/precision checks. +static float value_to_float(VALUE val) { return NUM2DBL(val); } +static double value_to_double(VALUE val) { return NUM2DBL(val); } +static bool value_to_bool(VALUE val) { return RTEST(val); } +static int32_t value_to_int32(VALUE val) { return NUM2INT(val); } +static uint32_t value_to_uint32(VALUE val) { return NUM2LONG(val); } +static int64_t value_to_int64(VALUE val) { return NUM2LONG(val); } +static uint64_t value_to_uint64(VALUE val) { return NUM2ULL(val); } + +// C -> Ruby VALUE +static VALUE float_to_value(float val) { return rb_float_new(val); } +static VALUE double_to_value(double val) { return rb_float_new(val); } +static VALUE bool_to_value(bool val) { return val ? Qtrue : Qfalse; } +static VALUE int32_to_value(int32_t val) { return INT2NUM(val); } +static VALUE uint32_to_value(uint32_t val) { return LONG2NUM(val); } +static VALUE int64_to_value(int64_t val) { return LONG2NUM(val); } +static VALUE uint64_to_value(uint64_t val) { return ULL2NUM(val); } + + +/* stringsink *****************************************************************/ + +// This should probably be factored into a common upb component. + +typedef struct { + upb_byteshandler handler; + upb_bytessink sink; + char *ptr; + size_t len, size; +} stringsink; + +static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { + stringsink *sink = _sink; + sink->len = 0; + return sink; +} -// These are handlers for populating a Ruby protobuf message when parsing. +static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, + size_t len, const upb_bufhandle *handle) { + UPB_UNUSED(hd); + UPB_UNUSED(handle); + + stringsink *sink = _sink; + size_t new_size = sink->size; + + while (sink->len + len > new_size) { + new_size *= 2; + } + + if (new_size != sink->size) { + sink->ptr = realloc(sink->ptr, new_size); + sink->size = new_size; + } + + memcpy(sink->ptr + sink->len, ptr, len); + sink->len += len; -static size_t strhandler(void *closure, const void *hd, const char *str, - size_t len, const upb_bufhandle *handle) { - rb_msg *msg = closure; - const size_t *ofs = hd; - DEREF(msg, *ofs, VALUE) = rb_str_new(str, len); return len; } -static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) { - size_t *hd_ofs = ALLOC(size_t); - *hd_ofs = ofs; - upb_handlers_addcleanup(h, hd_ofs, free); - return hd_ofs; +void stringsink_init(stringsink *sink) { + upb_byteshandler_init(&sink->handler); + upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); + upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); + + upb_bytessink_reset(&sink->sink, &sink->handler, sink); + + sink->size = 32; + sink->ptr = malloc(sink->size); } -static void add_handlers_for_message(const void *closure, upb_handlers *h) { - // XXX: Doesn't support submessages properly yet. - const rb_msgdef *rmd = closure; - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); +void stringsink_uninit(stringsink *sink) { + free(sink->ptr); +} - if (upb_fielddef_isseq(f)) { - rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet."); - } - size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; +/* object cache ***************************************************************/ - switch (upb_fielddef_type(f)) { - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_UINT32: - case UPB_TYPE_ENUM: - case UPB_TYPE_FLOAT: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - upb_shim_set(h, f, ofs, -1); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); - // XXX: does't currently handle split buffers. - upb_handlers_setstring(h, f, strhandler, &attr); - upb_handlerattr_uninit(&attr); - break; - } - case UPB_TYPE_MESSAGE: - rb_raise(rb_eRuntimeError, "Doesn't support submessages yet."); - break; - } +// The object cache is a singleton mapping of void* -> Ruby Object. +// It caches Ruby objects that wrap C objects. +// +// When we are wrapping C objects it is desirable to give them identity +// semantics. In other words, if you reach the same C object via two different +// paths, it is desirable (and sometimes even required) that you get the same +// wrapper object both times. If we instead just created a new wrapper object +// every time you ask for one, we could end up with unexpected results like: +// +// f1 = msgdef.field("request_id") +// f2 = msgdef.field("request_id") +// +// # equal? tests identity equality. Returns false without a cache. +// f1.equal?(f2) +// +// We do not register the cache with Ruby's GC, so being in this map will not +// keep the object alive. This is the desired behavior, because it lets objects +// be freed if they have no references from Ruby. We do require, though, that +// objects remove themselves from the map when they are freed. In this respect +// the cache operates like a weak map where the values are weak. + +typedef VALUE createfunc(const void *obj); + +// Call to initialize the cache. Should be done once on process startup. +static void objcache_init() { + upb_inttable_init(&objcache, UPB_CTYPE_UINT64); + objcache_initialized = true; +} + +// Call to uninitialize the cache. Should be done once on process shutdown. +static void objcache_uninit(ruby_vm_t *vm) { + assert(objcache_initialized); + assert(upb_inttable_count(&objcache) == 0); + + objcache_initialized = false; + upb_inttable_uninit(&objcache); +} + +// Looks up the given object in the cache. If the corresponding Ruby wrapper +// object is found, returns it, otherwise creates the wrapper and returns that. +static VALUE objcache_getorcreate(const void *obj, createfunc *func) { + assert(objcache_initialized); + + upb_value v; + if (!upb_inttable_lookupptr(&objcache, obj, &v)) { + v = upb_value_uint64(func(obj)); + upb_inttable_insertptr(&objcache, obj, v); } + return upb_value_getuint64(v); } -// Creates upb handlers for populating a message. -static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd, - const void *owner) { - return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd); +// Removes the given object from the cache. Should only be called by the code +// that is freeing the wrapper object. +static void objcache_remove(const void *obj) { + assert(objcache_initialized); + + bool removed = upb_inttable_removeptr(&objcache, obj, NULL); + UPB_ASSERT_VAR(removed, removed); +} + +/* message layout *************************************************************/ + +// We layout Ruby messages using a raw block of C memory. We assign offsets for +// each member so that instances are laid out like a C struct instead of as +// instance variables. This saves both memory and CPU. + +typedef struct { + // The size of the block of memory we should allocate for instances. + size_t size; + + // Prototype to memcpy() onto new message instances. Size is "size" above. + void *prototype; + + // An offset for each member, indexed by upb_fielddef_index(f). + uint32_t *field_offsets; +} rb_msglayout; + +// Returns true for fields where the field value we store is a Ruby VALUE (ie. a +// direct pointer to another Ruby object) instead of storing the value directly +// in the message. +static bool is_ruby_value(const upb_fielddef *f) { + if (upb_fielddef_isseq(f)) { + // Repeated fields are pointers to arrays. + return true; + } + + if (upb_fielddef_issubmsg(f)) { + // Submessage fields are pointers to submessages. + return true; + } + + if (upb_fielddef_isstring(f)) { + // String fields are pointers to string objects. + return true; + } + + return false; } // General alignment rules are that each type needs to be stored at an address @@ -116,8 +241,12 @@ static size_t align_up(size_t val, size_t align) { } // Byte size to store each upb type. -static size_t rupb_sizeof(upb_fieldtype_t type) { - switch (type) { +static size_t rupb_sizeof(const upb_fielddef *f) { + if (is_ruby_value(f)) { + return sizeof(VALUE); + } + + switch (upb_fielddef_type(f)) { case UPB_TYPE_BOOL: return 1; case UPB_TYPE_INT32: @@ -129,15 +258,228 @@ static size_t rupb_sizeof(upb_fieldtype_t type) { case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: return 8; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - return sizeof(VALUE); + default: + break; } assert(false); + return 0; } -/* msg ************************************************************************/ +// Calculates offsets for each field. +// +// This lets us pack protos like structs instead of storing them like +// dictionaries. This speeds up a parsing a lot and also saves memory +// (unless messages are very sparse). +static void assign_offsets(rb_msglayout *layout, const upb_msgdef *md) { + layout->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); + size_t ofs = 0; + upb_msg_iter i; + + for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + size_t field_size = rupb_sizeof(f); + + // Align field properly. + // + // TODO(haberman): optimize layout? For example we could sort fields + // big-to-small. + ofs = align_up(ofs, field_size); + + layout->field_offsets[upb_fielddef_index(f)] = ofs; + ofs += field_size; + } + + layout->size = ofs; +} + +// Creates a prototype; a buffer we can memcpy() onto new instances to +// initialize them. +static void make_prototype(rb_msglayout *layout, const upb_msgdef *md) { + void *prototype = ALLOC_N(char, layout->size); + + // Most members default to zero, so we'll start from that and then overwrite + // more specific initialization. + memset(prototype, 0, layout->size); + + upb_msg_iter i; + for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + if (is_ruby_value(f)) { + size_t ofs = layout->field_offsets[upb_fielddef_index(f)]; + // Default all Ruby pointers to nil. + DEREF_RAW(prototype, ofs, VALUE) = Qnil; + } + } + + layout->prototype = prototype; +} + + +static void msglayout_init(rb_msglayout *layout, const upb_msgdef *m) { + assign_offsets(layout, m); + make_prototype(layout, m); +} + +static void msglayout_uninit(rb_msglayout *layout) { + free(layout->field_offsets); + free(layout->prototype); +} + + +/* Upb::MessageDef ************************************************************/ + +// C representation for Upb::MessageDef. +// +// Contains a reference to the underlying upb_msgdef, as well as associated data +// like a reference to the corresponding Ruby class. +struct rupb_MessageDef { + // We own refs on all of these. + + // The upb_msgdef we are wrapping. + const upb_msgdef *md; + + // A DecoderMethod for parsing a protobuf into this type. + const upb_pbdecodermethod *fill_method; + + // Handlers for serializing into a protobuf of this type. + const upb_handlers *serialize_handlers; + + // The Ruby class for instances of this type. + VALUE klass; + + // Layout for messages of this type. + rb_msglayout layout; +}; + +// Called by the Ruby GC when a Upb::MessageDef is being freed. +static void msgdef_free(void *_rmd) { + rupb_MessageDef *rmd = _rmd; + objcache_remove(rmd->md); + upb_msgdef_unref(rmd->md, &rmd->md); + if (rmd->fill_method) { + upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method); + } + if (rmd->serialize_handlers) { + upb_handlers_unref(rmd->serialize_handlers, &rmd->serialize_handlers); + } + msglayout_uninit(&rmd->layout); + free(rmd); +} + +// Called by the Ruby GC during the "mark" phase to decide what is still alive. +// We call rb_gc_mark on all Ruby VALUE pointers we reference. +static void msgdef_mark(void *_rmd) { + rupb_MessageDef *rmd = _rmd; + rb_gc_mark(rmd->klass); + + // Mark all submessage types. + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (upb_fielddef_issubmsg(f)) { + // If we were trying to be more aggressively lazy, the submessage might + // not be created and we only mark ones that are. + rb_gc_mark(msgdef_getwrapper(upb_fielddef_msgsubdef(f))); + } + } +} + +static const rb_data_type_t msgdef_type = {"Upb::MessageDef", + {msgdef_mark, msgdef_free, NULL}}; + +// TODO(haberman): do we need an alloc func? We want to prohibit dup and +// probably subclassing too. + +static rupb_MessageDef *msgdef_get(VALUE self) { + rupb_MessageDef *msgdef; + TypedData_Get_Struct(self, rupb_MessageDef, &msgdef_type, msgdef); + return msgdef; +} + +// Constructs the upb decoder method for parsing messages of this type. +const upb_pbdecodermethod *new_fillmsg_decodermethod(const rupb_MessageDef *rmd, + const void *owner) { + const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers); + upb_pbdecodermethodopts opts; + upb_pbdecodermethodopts_init(&opts, fill_handlers); + + const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); + upb_handlers_unref(fill_handlers, &fill_handlers); + return ret; +} + +// Constructs a new Ruby wrapper object around the given msgdef. +static VALUE make_msgdef(const void *_md) { + const upb_msgdef *md = _md; + rupb_MessageDef *rmd; + VALUE ret = + TypedData_Make_Struct(cMessageDef, rupb_MessageDef, &msgdef_type, rmd); + + upb_msgdef_ref(md, &rmd->md); + + rmd->md = md; + rmd->fill_method = NULL; + + // OPT: most of these things could be built lazily, when they are first + // needed. + msglayout_init(&rmd->layout, md); + + rmd->fill_method = NULL; + rmd->klass = new_message_class(ret); + rmd->serialize_handlers = + upb_pb_encoder_newhandlers(md, &rmd->serialize_handlers); + + return ret; +} + +// Accessor to get a decoder method for this message type. +// Constructs the decoder method lazily. +static const upb_pbdecodermethod *msgdef_decodermethod(rupb_MessageDef *rmd) { + if (!rmd->fill_method) { + rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method); + } + + return rmd->fill_method; +} + +static VALUE msgdef_getwrapper(const upb_msgdef *md) { + return objcache_getorcreate(md, make_msgdef); +} + +static const rupb_MessageDef *get_rbmsgdef(const upb_msgdef *md) { + return msgdef_get(msgdef_getwrapper(md)); +} + + +/* Upb::Message ***************************************************************/ + +// Code to implement the Upb::Message object. +// +// A unique Ruby class is generated for each message type, but all message types +// share Upb::Message as their base class. Upb::Message contains all of the +// actual functionality; the only reason the derived class exists at all is +// for convenience. It lets Ruby users do things like: +// +// message = MyMessage.new +// if message.kind_of?(MyMessage) +// +// ... and other similar things that Ruby users expect they can do. + +// C representation of Upb::Message. +// +// Represents a message instance, laid out like a C struct in a type-specific +// layout. +// +// This will be sized according to what fields are actually present. +struct rupb_Message { + VALUE rbmsgdef; + char data[]; +}; + +// Returns the size of a message instance. +size_t msg_size(const rupb_MessageDef *rmd) { + return sizeof(rupb_Message) + rmd->layout.size; +} static void msg_free(void *msg) { free(msg); @@ -145,103 +487,170 @@ static void msg_free(void *msg) { // Invoked by the Ruby GC whenever it is doing a mark-and-sweep. static void msg_mark(void *p) { - rb_msg *msg = p; - rb_msgdef *rmd; - Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd); + rupb_Message *msg = p; + rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef); // Mark the msgdef to keep it alive. - rb_gc_mark(msg->data.rbmsgdef); + rb_gc_mark(msg->rbmsgdef); // We need to mark all references to other Ruby values: strings, arrays, and - // submessages that we point to. Only strings are implemented so far. + // submessages that we point to. upb_msg_iter i; for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); - if (upb_fielddef_isstring(f)) { - size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; + if (is_ruby_value(f)) { + size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; rb_gc_mark(DEREF(msg, ofs, VALUE)); } } } -static VALUE msg_new(VALUE msgdef) { - const rb_msgdef *rmd; - Data_Get_Struct(msgdef, rb_msgdef, rmd); +static const rb_data_type_t msg_type = {"Upb::Message", + {msg_mark, msg_free, NULL}}; + +static rupb_Message *msg_get(VALUE self) { + rupb_Message *msg; + TypedData_Get_Struct(self, rupb_Message, &msg_type, msg); + return msg; +} + +// Instance variable name that we use to store a reference from the Ruby class +// for a message and its Upb::MessageDef. +// +// We avoid prefixing this by "@" to make it inaccessible by Ruby. +static const char *kMessageDefMemberName = "msgdef"; + +static VALUE msg_getmsgdef(VALUE klass) { + VALUE msgdef = rb_iv_get(klass, kMessageDefMemberName); - rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size); - memset(msg, 0, rmd->size); - msg->data.rbmsgdef = msgdef; + if (msgdef == Qnil) { + // TODO(haberman): If we want to allow subclassing, we might want to walk up + // the hierarchy looking for this member. + rb_raise(rb_eRuntimeError, + "Can't call on Upb::Message directly, only subclasses"); + } + + return msgdef; +} - VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg); +// Called by the Ruby VM when it wants to create a new message instance. +static VALUE msg_alloc(VALUE klass) { + VALUE msgdef = msg_getmsgdef(klass); + const rupb_MessageDef *rmd = msgdef_get(msgdef); + + rupb_Message *msg = (rupb_Message*)ALLOC_N(char, msg_size(rmd)); + msg->rbmsgdef = msgdef; + memcpy(&msg->data, rmd->layout.prototype, rmd->layout.size); + + VALUE ret = TypedData_Wrap_Struct(klass, &msg_type, msg); return ret; } -static const upb_fielddef *lookup_field(rb_msg *msg, const char *field, - size_t *ofs) { - const rb_msgdef *rmd; - Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd); - const upb_fielddef *f = upb_msgdef_ntof(rmd->md, field); +// Creates a new Ruby class for the given Upb::MessageDef. The new class +// derives from Upb::Message but also stores a reference to the Upb::MessageDef. +static VALUE new_message_class(VALUE message_def) { + msgdef_get(message_def); // Check type. + VALUE klass = rb_class_new(cMessage); + rb_iv_set(klass, kMessageDefMemberName, message_def); + + // This shouldn't be necessary because we should inherit the alloc func from + // the base class of Message. For some reason this is not working properly + // and we are having to define it manually. + rb_define_alloc_func(klass, msg_alloc); + + return klass; +} + +// Call to create a new Message instance. +static VALUE msg_new(VALUE msgdef) { + return rb_class_new_instance(0, NULL, get_message_class(Qnil, msgdef)); +} + +// Looks up the given field. On success returns the upb_fielddef and stores the +// offset in *ofs. Otherwise raises a Ruby exception. +static const upb_fielddef *lookup_field(rupb_Message *msg, const char *field, + size_t len, size_t *ofs) { + const rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef); + const upb_fielddef *f = upb_msgdef_ntof(rmd->md, field, len); + if (!f) { - rb_raise(rb_eArgError, "No such field: %s", field); + rb_raise(rb_eArgError, "Message %s does not contain field %s", + upb_msgdef_fullname(rmd->md), field); } - *ofs = rmd->field_offsets[upb_fielddef_index(f)]; + + *ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; return f; } -static VALUE msg_setter(rb_msg *msg, VALUE field, VALUE val) { +// Sets the given field to the given value. +static void setprimitive(rupb_Message *m, size_t ofs, const upb_fielddef *f, + VALUE val) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: DEREF(m, ofs, float) = value_to_float(val); break; + case UPB_TYPE_DOUBLE: DEREF(m, ofs, double) = value_to_double(val); break; + case UPB_TYPE_BOOL: DEREF(m, ofs, bool) = value_to_bool(val); break; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: DEREF(m, ofs, int32_t) = value_to_int32(val); break; + case UPB_TYPE_UINT32: DEREF(m, ofs, uint32_t) = value_to_uint32(val); break; + case UPB_TYPE_INT64: DEREF(m, ofs, int64_t) = value_to_int64(val); break; + case UPB_TYPE_UINT64: DEREF(m, ofs, uint64_t) = value_to_uint64(val); break; + default: rb_bug("Unexpected type"); + } +} + +// Returns the Ruby VALUE for the given field. +static VALUE getprimitive(rupb_Message *m, size_t ofs, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: return float_to_value(DEREF(m, ofs, float)); + case UPB_TYPE_DOUBLE: return double_to_value(DEREF(m, ofs, double)); + case UPB_TYPE_BOOL: return bool_to_value(DEREF(m, ofs, bool)); + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: return int32_to_value(DEREF(m, ofs, int32_t)); + case UPB_TYPE_UINT32: return uint32_to_value(DEREF(m, ofs, uint32_t)); + case UPB_TYPE_INT64: return int64_to_value(DEREF(m, ofs, int64_t)); + case UPB_TYPE_UINT64: return uint64_to_value(DEREF(m, ofs, uint64_t)); + default: rb_bug("Unexpected type"); + } +} + +static VALUE msg_setter(rupb_Message *msg, VALUE field, VALUE val) { size_t ofs; - char *fieldp = RSTRING_PTR(field); - size_t field_last = RSTRING_LEN(field) - 1; // fieldp is a string like "id=". But we want to look up "id". - // We take the liberty of temporarily setting the "=" to NULL. - assert(fieldp[field_last] == '='); - fieldp[field_last] = '\0'; - const upb_fielddef *f = lookup_field(msg, fieldp, &ofs); - fieldp[field_last] = '='; + const upb_fielddef *f = + lookup_field(msg, RSTRING_PTR(field), RSTRING_LEN(field) - 1, &ofs); // Possibly introduce stricter type checking. - switch (upb_fielddef_type(f)) { - case UPB_TYPE_FLOAT: DEREF(msg, ofs, float) = NUM2DBL(val); - case UPB_TYPE_DOUBLE: DEREF(msg, ofs, double) = NUM2DBL(val); - case UPB_TYPE_BOOL: DEREF(msg, ofs, bool) = RTEST(val); - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: DEREF(msg, ofs, VALUE) = val; - case UPB_TYPE_MESSAGE: return Qnil; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: DEREF(msg, ofs, int32_t) = NUM2INT(val); - case UPB_TYPE_UINT32: DEREF(msg, ofs, uint32_t) = NUM2LONG(val); - case UPB_TYPE_INT64: DEREF(msg, ofs, int64_t) = NUM2LONG(val); - case UPB_TYPE_UINT64: DEREF(msg, ofs, uint64_t) = NUM2ULL(val); + if (is_ruby_value(f)) { + DEREF(msg, ofs, VALUE) = val; + } else { + setprimitive(msg, ofs, f, val); } return val; } -static VALUE msg_getter(rb_msg *msg, VALUE field) { +static VALUE msg_getter(rupb_Message *msg, VALUE field) { size_t ofs; - const upb_fielddef *f = lookup_field(msg, RSTRING_PTR(field), &ofs); + const upb_fielddef *f = + lookup_field(msg, RSTRING_PTR(field), RSTRING_LEN(field), &ofs); - switch (upb_fielddef_type(f)) { - case UPB_TYPE_FLOAT: return rb_float_new(DEREF(msg, ofs, float)); - case UPB_TYPE_DOUBLE: return rb_float_new(DEREF(msg, ofs, double)); - case UPB_TYPE_BOOL: return DEREF(msg, ofs, bool) ? Qtrue : Qfalse; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: return DEREF(msg, ofs, VALUE); - case UPB_TYPE_MESSAGE: return Qnil; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: return INT2NUM(DEREF(msg, ofs, int32_t)); - case UPB_TYPE_UINT32: return LONG2NUM(DEREF(msg, ofs, uint32_t)); - case UPB_TYPE_INT64: return LONG2NUM(DEREF(msg, ofs, int64_t)); - case UPB_TYPE_UINT64: return ULL2NUM(DEREF(msg, ofs, uint64_t)); + if (is_ruby_value(f)) { + return DEREF(msg, ofs, VALUE); + } else { + return getprimitive(msg, ofs, f); } - - rb_bug("Unexpected type"); } +// This is the Message object's "method_missing" method, so it receives calls +// for any method whose name was not recognized. We use it to implement getters +// and setters for every field +// +// call-seq: +// message.field -> current value of "field" +// message.field = new_value static VALUE msg_accessor(int argc, VALUE *argv, VALUE obj) { - rb_msg *msg; - Data_Get_Struct(obj, rb_msg, msg); + rupb_Message *msg = msg_get(obj); // method_missing protocol: (method [, arg1, arg2, ...]) assert(argc >= 1 && SYMBOL_P(argv[0])); @@ -270,72 +679,106 @@ static VALUE msg_accessor(int argc, VALUE *argv, VALUE obj) { } } -/* msgdef *********************************************************************/ - -static void msgdef_free(void *_rmd) { - rb_msgdef *rmd = _rmd; - upb_msgdef_unref(rmd->md, &rmd->md); - if (rmd->fill_method) { - upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method); - } - free(rmd->field_offsets); +// Called when Ruby wants to turn this value into a string. +// TODO(haberman): implement. +static VALUE msg_tostring(VALUE self) { + return rb_str_new2("tostring!"); } -const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd, - const void *owner) { - const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers); - upb_pbdecodermethodopts opts; - upb_pbdecodermethodopts_init(&opts, fill_handlers); +// call-seq: +// MessageClass.parse(binary_protobuf) -> message instance +// +// Parses a binary protobuf according to this message class and returns a new +// message instance of this class type. +static VALUE msg_parse(VALUE klass, VALUE binary_protobuf) { + Check_Type(binary_protobuf, T_STRING); + rupb_MessageDef *rmd = msgdef_get(msg_getmsgdef(klass)); - const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); - upb_handlers_unref(fill_handlers, &fill_handlers); - return ret; + VALUE msg = rb_class_new_instance(0, NULL, klass); + rupb_Message *msgp = msg_get(msg); + + const upb_pbdecodermethod *method = msgdef_decodermethod(rmd); + const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); + upb_pbdecoder decoder; + upb_sink sink; + upb_status status = UPB_STATUS_INIT; + + upb_pbdecoder_init(&decoder, method, &status); + upb_sink_reset(&sink, h, msgp); + upb_pbdecoder_resetoutput(&decoder, &sink); + upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf), + RSTRING_LEN(binary_protobuf), + upb_pbdecoder_input(&decoder)); + + // TODO(haberman): make uninit optional if custom allocator for parsing + // returns GC-rooted memory. That will make decoding longjmp-safe (required + // if parsing triggers any VM errors like OOM or errors in user handlers). + upb_pbdecoder_uninit(&decoder); + rupb_checkstatus(&status); + + return msg; } -// Calculates offsets for each field. +// call-seq: +// Message.serialize(message instance) -> serialized string // -// This lets us pack protos like structs instead of storing them like -// dictionaries. This speeds up a parsing a lot and also saves memory -// (unless messages are very sparse). -static void assign_offsets(rb_msgdef *rmd) { - size_t ofs = sizeof(rb_msg); // Msg starts with predeclared members. - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - size_t field_size = rupb_sizeof(upb_fielddef_type(f)); - ofs = align_up(ofs, field_size); // Align field properly. - rmd->field_offsets[upb_fielddef_index(f)] = ofs; - ofs += field_size; - } - rmd->size = ofs; -} +// Serializes the given message instance to a string. +static VALUE msg_serialize(VALUE klass, VALUE message) { + rupb_Message *msg = msg_get(message); + const rupb_MessageDef *rmd = msgdef_get(msg->rbmsgdef); -// Constructs a new Ruby wrapper object around the given msgdef. -static VALUE make_msgdef(const upb_msgdef *md) { - rb_msgdef *rmd; - VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd); + stringsink sink; + stringsink_init(&sink); - upb_msgdef_ref(md, &rmd->md); + upb_pb_encoder encoder; + upb_pb_encoder_init(&encoder, rmd->serialize_handlers); + upb_pb_encoder_resetoutput(&encoder, &sink.sink); - rmd->md = md; - rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); - rmd->fill_method = NULL; + putmsg(msg, rmd, upb_pb_encoder_input(&encoder)); - assign_offsets(rmd); + VALUE ret = rb_str_new(sink.ptr, sink.len); - rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method); + upb_pb_encoder_uninit(&encoder); + stringsink_uninit(&sink); return ret; } -// Loads a descriptor and constructs a MessageDef to the named message. -static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) { + +/* Upb::SymbolTable ***********************************************************/ + +// Ruby wrapper around a SymbolTable. Allows loading of descriptors and turning +// them into MessageDef objects. + +void symtab_free(void *s) { + upb_symtab_unref(s, UPB_UNTRACKED_REF); +} + +static const rb_data_type_t symtab_type = {"Upb::SymbolTable", + {NULL, symtab_free, NULL}}; + +// Called by the Ruby VM to allocate a SymbolTable object. +static VALUE symtab_alloc(VALUE klass) { upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF); + VALUE ret = TypedData_Wrap_Struct(klass, &symtab_type, symtab); - // Wrap the symtab in a Ruby object so it gets GC'd. - // In a real wrapper we would wrap this object more fully (ie. expose its - // methods to Ruby callers). - Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab); + return ret; +} + +static upb_symtab *symtab_get(VALUE self) { + upb_symtab *symtab; + TypedData_Get_Struct(self, upb_symtab, &symtab_type, symtab); + return symtab; +} + +// call-seq: +// symtab.load_descriptor(descriptor) +// +// Parses a FileDescriptorSet from the given string and adds the defs to the +// SymbolTable. Raises if there was an error. +static VALUE symtab_load_descriptor(VALUE self, VALUE descriptor) { + upb_symtab *symtab = symtab_get(self); + Check_Type(descriptor, T_STRING); upb_status status = UPB_STATUS_INIT; upb_load_descriptor_into_symtab( @@ -346,51 +789,377 @@ static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) { "Error loading descriptor: %s", upb_status_errmsg(&status)); } - const char *name = RSTRING_PTR(message_name); - const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name); + return Qnil; +} + +// call-seq: +// symtab.lookup(name) +// +// Returns the def for this name, or nil if none. +// TODO(haberman): only support messages right now, not enums. +static VALUE symtab_lookup(VALUE self, VALUE name) { + upb_symtab *symtab = symtab_get(self); + Check_Type(name, T_STRING); + + const char *cname = RSTRING_PTR(name); + const upb_msgdef *m = upb_symtab_lookupmsg(symtab, cname); if (!m) { - rb_raise(rb_eRuntimeError, "Message name '%s' not found", name); + rb_raise(rb_eRuntimeError, "Message name '%s' not found", cname); } - return make_msgdef(m); + return msgdef_getwrapper(m); } -static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) { - const rb_msgdef *rmd; - Data_Get_Struct(self, rb_msgdef, rmd); - VALUE msg = msg_new(self); - rb_msg *msgp; - Data_Get_Struct(msg, rb_msg, msgp); +/* handlers *******************************************************************/ - const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method); - upb_pbdecoder decoder; - upb_sink sink; - upb_status status = UPB_STATUS_INIT; +// These are handlers for populating a Ruby protobuf message (rupb_Message) when +// parsing. - upb_pbdecoder_init(&decoder, rmd->fill_method, &status); - upb_sink_reset(&sink, h, msgp); - upb_pbdecoder_resetoutput(&decoder, &sink); - upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf), - RSTRING_LEN(binary_protobuf), - upb_pbdecoder_input(&decoder)); - // TODO(haberman): make uninit optional if custom allocator for parsing - // returns GC-rooted memory. That will make decoding longjmp-safe (required - // if parsing triggers any VM errors like OOM or errors in user handlers). - upb_pbdecoder_uninit(&decoder); - rupb_checkstatus(&status); +// Creates a handlerdata that simply contains the offset for this field. +static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) { + size_t *hd_ofs = ALLOC(size_t); + *hd_ofs = ofs; + upb_handlers_addcleanup(h, hd_ofs, free); + return hd_ofs; +} - return msg; +typedef struct { + size_t ofs; + const upb_msgdef *md; +} submsg_handlerdata_t; + +// Creates a handlerdata that contains offset and submessage type information. +static const void *newsubmsghandlerdata(upb_handlers *h, uint32_t ofs, + const upb_fielddef *f) { + submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t); + hd->ofs = ofs; + hd->md = upb_fielddef_msgsubdef(f); + upb_handlers_addcleanup(h, hd, free); + return hd; +} + +// A handler that starts a repeated field. Gets or creates a Ruby array for the +// field. +static void *startseq_handler(void *closure, const void *hd) { + rupb_Message *msg = closure; + const size_t *ofs = hd; + + if (DEREF(msg, *ofs, VALUE) == Qnil) { + DEREF(msg, *ofs, VALUE) = rb_ary_new(); + } + + return (void*)DEREF(msg, *ofs, VALUE); +} + +// Handlers that append primitive values to a repeated field (a regular Ruby +// array for now). +#define DEFINE_APPEND_HANDLER(type, ctype) \ + static bool append##type##_handler(void *closure, const void *hd, \ + ctype val) { \ + VALUE ary = (VALUE)closure; \ + rb_ary_push(ary, type##_to_value(val)); \ + return true; \ + } + +DEFINE_APPEND_HANDLER(bool, bool) +DEFINE_APPEND_HANDLER(int32, int32_t) +DEFINE_APPEND_HANDLER(uint32, uint32_t) +DEFINE_APPEND_HANDLER(float, float) +DEFINE_APPEND_HANDLER(int64, int64_t) +DEFINE_APPEND_HANDLER(uint64, uint64_t) +DEFINE_APPEND_HANDLER(double, double) + +// Appends a string to a repeated field (a regular Ruby array for now). +static size_t appendstr_handler(void *closure, const void *hd, const char *str, + size_t len, const upb_bufhandle *handle) { + VALUE ary = (VALUE)closure; + rb_ary_push(ary, rb_str_new(str, len)); + return len; +} + +// Sets a non-repeated string field in a message. +static size_t str_handler(void *closure, const void *hd, const char *str, + size_t len, const upb_bufhandle *handle) { + rupb_Message *msg = closure; + const size_t *ofs = hd; + DEREF(msg, *ofs, VALUE) = rb_str_new(str, len); + return len; +} + +// Appends a submessage to a repeated field (a regular Ruby array for now). +static void *appendsubmsg_handler(void *closure, const void *hd) { + VALUE ary = (VALUE)closure; + const submsg_handlerdata_t *submsgdata = hd; + VALUE submsg = msg_new(msgdef_getwrapper(submsgdata->md)); + rb_ary_push(ary, submsg); + return msg_get(submsg); +} + +// Sets a non-repeated submessage field in a message. +static void *submsg_handler(void *closure, const void *hd) { + rupb_Message *msg = closure; + const submsg_handlerdata_t *submsgdata = hd; + + if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { + DEREF(msg, submsgdata->ofs, VALUE) = msg_new(msgdef_getwrapper(submsgdata->md)); + } + + VALUE submsg = DEREF(msg, submsgdata->ofs, VALUE); + return msg_get(submsg); +} + +static void add_handlers_for_message(const void *closure, upb_handlers *h) { + const rupb_MessageDef *rmd = get_rbmsgdef(upb_handlers_msgdef(h)); + upb_msg_iter i; + + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; + + if (upb_fielddef_isseq(f)) { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); + upb_handlers_setstartseq(h, f, startseq_handler, &attr); + upb_handlerattr_uninit(&attr); + + switch (upb_fielddef_type(f)) { + +#define SET_HANDLER(utype, ltype) \ + case utype: \ + upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \ + break; + + SET_HANDLER(UPB_TYPE_BOOL, bool); + SET_HANDLER(UPB_TYPE_INT32, int32); + SET_HANDLER(UPB_TYPE_UINT32, uint32); + SET_HANDLER(UPB_TYPE_ENUM, int32); + SET_HANDLER(UPB_TYPE_FLOAT, float); + SET_HANDLER(UPB_TYPE_INT64, int64); + SET_HANDLER(UPB_TYPE_UINT64, uint64); + SET_HANDLER(UPB_TYPE_DOUBLE, double); + +#undef SET_HANDLER + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + // XXX: does't currently handle split buffers. + upb_handlers_setstring(h, f, appendstr_handler, NULL); + break; + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f)); + upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } + } + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + // The shim writes directly at the given offset (instead of using + // DEREF()) so we need to add the msg overhead. + upb_shim_set(h, f, ofs + sizeof(rupb_Message), -1); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); + // XXX: does't currently handle split buffers. + upb_handlers_setstring(h, f, str_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, ofs, f)); + upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } + } +} + +// Creates upb handlers for populating a message. +static const upb_handlers *new_fill_handlers(const rupb_MessageDef *rmd, + const void *owner) { + return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, NULL); +} + + +/* msgvisitor *****************************************************************/ + +// This is code to push the contents of a Ruby message (rupb_Message) to a upb +// sink. + +static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { + upb_selector_t ret; + bool ok = upb_handlers_getselector(f, type, &ret); + UPB_ASSERT_VAR(ok, ok); + return ret; +} + +static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) { + if (str == Qnil) return; + + assert(BUILTIN_TYPE(str) == RUBY_T_STRING); + upb_sink subsink; + + upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str), + &subsink); + upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str), + RSTRING_LEN(str), NULL); + upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR)); +} + +static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink) { + if (submsg == Qnil) return; + + upb_sink subsink; + const rupb_MessageDef *sub_rmd = get_rbmsgdef(upb_fielddef_msgsubdef(f)); + + upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink); + putmsg(msg_get(submsg), sub_rmd, &subsink); + upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG)); +} + +static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink) { + if (ary == Qnil) return; + + assert(BUILTIN_TYPE(ary) == RUBY_T_ARRAY); + upb_sink subsink; + + upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); + + upb_fieldtype_t type = upb_fielddef_type(f); + upb_selector_t sel = 0; + if (upb_fielddef_isprimitive(f)) { + sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + } + + int i; + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE val = rb_ary_entry(ary, i); + switch (type) { + +#define T(upbtypeconst, upbtype, ctype) \ + case upbtypeconst: \ + upb_sink_put##upbtype(&subsink, sel, value_to_##upbtype(val)); \ + break; + + T(UPB_TYPE_FLOAT, float, float) + T(UPB_TYPE_DOUBLE, double, double) + T(UPB_TYPE_BOOL, bool, bool) + case UPB_TYPE_ENUM: + T(UPB_TYPE_INT32, int32, int32_t) + T(UPB_TYPE_UINT32, uint32, uint32_t) + T(UPB_TYPE_INT64, int64, int64_t) + T(UPB_TYPE_UINT64, uint64, uint64_t) + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + putstr(val, f, &subsink); + break; + case UPB_TYPE_MESSAGE: + putsubmsg(val, f, &subsink); + break; + +#undef T + + } + } + upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); +} + +static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd, + upb_sink *sink) { + upb_sink_startmsg(sink); + + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + uint32_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; + + if (upb_fielddef_isseq(f)) { + VALUE ary = DEREF(msg, ofs, VALUE); + if (ary != Qnil) { + putary(ary, f, sink); + } + } else if (upb_fielddef_isstring(f)) { + putstr(DEREF(msg, ofs, VALUE), f, sink); + } else if (upb_fielddef_issubmsg(f)) { + putsubmsg(DEREF(msg, ofs, VALUE), f, sink); + } else { + upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + +#define T(upbtypeconst, upbtype, ctype) \ + case upbtypeconst: \ + upb_sink_put##upbtype(sink, sel, DEREF(msg, ofs, ctype)); \ + break; + + switch (upb_fielddef_type(f)) { + T(UPB_TYPE_FLOAT, float, float) + T(UPB_TYPE_DOUBLE, double, double) + T(UPB_TYPE_BOOL, bool, bool) + case UPB_TYPE_ENUM: + T(UPB_TYPE_INT32, int32, int32_t) + T(UPB_TYPE_UINT32, uint32, uint32_t) + T(UPB_TYPE_INT64, int64, int64_t) + T(UPB_TYPE_UINT64, uint64, uint64_t) + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error."); + } + +#undef T + + } + } + + upb_status status; + upb_sink_endmsg(sink, &status); +} + + +/* top level ******************************************************************/ + +static VALUE get_message_class(VALUE klass, VALUE message) { + rupb_MessageDef *rmd = msgdef_get(message); + return rmd->klass; } void Init_upb() { VALUE upb = rb_define_module("Upb"); + rb_define_singleton_method(upb, "get_message_class", get_message_class, 1); + rb_gc_register_address(&message_map); + + cSymbolTable = rb_define_class_under(upb, "SymbolTable", rb_cObject); + rb_define_alloc_func(cSymbolTable, symtab_alloc); + rb_define_method(cSymbolTable, "load_descriptor", symtab_load_descriptor, 1); + rb_define_method(cSymbolTable, "lookup", symtab_lookup, 1); cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject); - rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2); - rb_define_method(cMessageDef, "parse", msgdef_parse, 1); cMessage = rb_define_class_under(upb, "Message", rb_cObject); + rb_define_alloc_func(cMessage, msg_alloc); rb_define_method(cMessage, "method_missing", msg_accessor, -1); + rb_define_method(cMessage, "to_s", msg_tostring, 0); + rb_define_singleton_method(cMessage, "parse", msg_parse, 1); + rb_define_singleton_method(cMessage, "serialize", msg_serialize, 1); + + objcache_init(); + + // This causes atexit crashes for unknown reasons. :( + // ruby_vm_at_exit(objcache_uninit); } -- cgit v1.2.3