From 0fd2f830882402979a83010e89650e7245960d39 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 21 Jan 2014 18:38:49 -0800 Subject: Sync to internal Google development. --- upb/bindings/googlepb/README | 20 + upb/bindings/googlepb/bridge.cc | 279 ++++++++++++++ upb/bindings/googlepb/bridge.h | 205 ++++++++++ upb/bindings/googlepb/proto1.cc | 483 ++++++++++++++++++++++++ upb/bindings/googlepb/proto1.h | 51 +++ upb/bindings/googlepb/proto2.cc | 816 ++++++++++++++++++++++++++++++++++++++++ upb/bindings/googlepb/proto2.h | 61 +++ 7 files changed, 1915 insertions(+) create mode 100644 upb/bindings/googlepb/README create mode 100644 upb/bindings/googlepb/bridge.cc create mode 100644 upb/bindings/googlepb/bridge.h create mode 100644 upb/bindings/googlepb/proto1.cc create mode 100644 upb/bindings/googlepb/proto1.h create mode 100644 upb/bindings/googlepb/proto2.cc create mode 100644 upb/bindings/googlepb/proto2.h (limited to 'upb/bindings/googlepb') diff --git a/upb/bindings/googlepb/README b/upb/bindings/googlepb/README new file mode 100644 index 0000000..e3140f4 --- /dev/null +++ b/upb/bindings/googlepb/README @@ -0,0 +1,20 @@ +This directory contains code to interoperate with Google's official +Protocol Buffers release. Since it doesn't really have a name +besides "protobuf," calling this directory "googlepb" seems like the +least confusing option, since it lives in the google::protobuf +namespace. + +We support writing into protobuf's generated classes (and hopefully +reading too, before long). We support both the open source protobuf +release and the Google-internal version (which is mostly the same +code, just in a different namespace). A single compile of upb can +support both (there are no conflicts thanks to function overloading). + +The internal version supports some features that are not supported in +the open-source release. Also, the internal version includes the +legacy "proto1" classes which we must support; thankfully this is +mostly relegated to its own separate file. + +Our functionality requires the full google::protobuf::Message +interface; we rely on reflection so we know what fields to read/write +and where to put them, so we can't support MessageLite. diff --git a/upb/bindings/googlepb/bridge.cc b/upb/bindings/googlepb/bridge.cc new file mode 100644 index 0000000..a125249 --- /dev/null +++ b/upb/bindings/googlepb/bridge.cc @@ -0,0 +1,279 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// IMPORTANT NOTE! Inside Google, This file is compiled TWICE, once with +// UPB_GOOGLE3 defined and once without! This allows us to provide +// functionality against proto2 and protobuf opensource both in a single binary +// without the two conflicting. However we must be careful not to violate the +// ODR. + +#include "upb/bindings/googlepb/bridge.h" + +#include +#include +#include +#include "upb/def.h" +#include "upb/bindings/googlepb/proto1.h" +#include "upb/bindings/googlepb/proto2.h" +#include "upb/handlers.h" + +#define ASSERT_STATUS(status) do { \ + if (!upb_ok(status)) { \ + fprintf(stderr, "upb status failure: %s\n", upb_status_errmsg(status)); \ + assert(upb_ok(status)); \ + } \ + } while (0) + +#ifdef UPB_GOOGLE3 +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/message.h" +#include "net/proto2/proto/descriptor.pb.h" +namespace goog = ::proto2; +#else +#include "google/protobuf/descriptor.h" +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.pb.h" +namespace goog = ::google::protobuf; +#endif + +namespace { + +const goog::Message* GetPrototype(const goog::Message& m, + const goog::FieldDescriptor* f) { + const goog::Message* ret = NULL; +#ifdef UPB_GOOGLE3 + ret = upb::google::GetProto1WeakPrototype(m, f); + if (ret) return ret; +#endif + + if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) { + ret = upb::google::GetFieldPrototype(m, f); +#ifdef UPB_GOOGLE3 + if (!ret) ret = upb::google::GetProto1FieldPrototype(m, f); +#endif + assert(ret); + } + return ret; +} + +} // namespace + +namespace upb { +namespace googlepb { + + +/* DefBuilder ****************************************************************/ + +const EnumDef* DefBuilder::GetEnumDef(const goog::EnumDescriptor* ed) { + const EnumDef* cached = FindInCache(ed); + if (cached) return cached; + + EnumDef* e = AddToCache(ed, EnumDef::New()); + + Status status; + e->set_full_name(ed->full_name(), &status); + for (int i = 0; i < ed->value_count(); i++) { + const goog::EnumValueDescriptor* val = ed->value(i); + bool success = e->AddValue(val->name(), val->number(), &status); + UPB_ASSERT_VAR(success, success); + } + + e->Freeze(&status); + + ASSERT_STATUS(&status); + return e; +} + +const MessageDef* DefBuilder::GetMaybeUnfrozenMessageDef( + const goog::Descriptor* d, const goog::Message* m) { + const MessageDef* cached = FindInCache(d); + if (cached) return cached; + + MessageDef* md = AddToCache(d, MessageDef::New()); + to_freeze_.push_back(upb::upcast(md)); + + Status status; + md->set_full_name(d->full_name(), &status); + ASSERT_STATUS(&status); + + // Find all regular fields and extensions for this message. + std::vector fields; + d->file()->pool()->FindAllExtensions(d, &fields); + for (int i = 0; i < d->field_count(); i++) { + fields.push_back(d->field(i)); + } + + for (int i = 0; i < fields.size(); i++) { + const goog::FieldDescriptor* proto2_f = fields[i]; + assert(proto2_f); +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (proto2_f->options().lazy()) continue; +#endif + md->AddField(NewFieldDef(proto2_f, m), &status); + } + ASSERT_STATUS(&status); + return md; +} + +reffed_ptr DefBuilder::NewFieldDef(const goog::FieldDescriptor* f, + const goog::Message* m) { + const goog::Message* subm = NULL; + const goog::Message* weak_prototype = NULL; + + if (m) { +#ifdef UPB_GOOGLE3 + weak_prototype = upb::google::GetProto1WeakPrototype(*m, f); +#endif + subm = GetPrototype(*m, f); + } + + reffed_ptr upb_f(FieldDef::New()); + Status status; + upb_f->set_number(f->number(), &status); + upb_f->set_label(FieldDef::ConvertLabel(f->label())); + + if (f->is_extension()) { + upb_f->set_name(f->full_name(), &status); + upb_f->set_is_extension(true); + } else { + upb_f->set_name(f->name(), &status); + } + + // For weak fields, weak_prototype will be non-NULL even though the proto2 + // descriptor does not indicate a submessage field. + upb_f->set_descriptor_type(weak_prototype + ? UPB_DESCRIPTOR_TYPE_MESSAGE + : FieldDef::ConvertDescriptorType(f->type())); + + switch (upb_f->type()) { + case UPB_TYPE_INT32: + upb_f->set_default_int32(f->default_value_int32()); + break; + case UPB_TYPE_INT64: + upb_f->set_default_int64(f->default_value_int64()); + break; + case UPB_TYPE_UINT32: + upb_f->set_default_uint32(f->default_value_uint32()); + break; + case UPB_TYPE_UINT64: + upb_f->set_default_uint64(f->default_value_uint64()); + break; + case UPB_TYPE_DOUBLE: + upb_f->set_default_double(f->default_value_double()); + break; + case UPB_TYPE_FLOAT: + upb_f->set_default_float(f->default_value_float()); + break; + case UPB_TYPE_BOOL: + upb_f->set_default_bool(f->default_value_bool()); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + upb_f->set_default_string(f->default_value_string(), &status); + break; + case UPB_TYPE_MESSAGE: { + const goog::Descriptor* subd = + subm ? subm->GetDescriptor() : f->message_type(); + upb_f->set_message_subdef(GetMaybeUnfrozenMessageDef(subd, subm), + &status); + break; + } + case UPB_TYPE_ENUM: + // We set the enum default numerically. + upb_f->set_default_int32(f->default_value_enum()->number()); + upb_f->set_enum_subdef(GetEnumDef(f->enum_type()), &status); + break; + } + + ASSERT_STATUS(&status); + return upb_f; +} + +void DefBuilder::Freeze() { + upb::Status status; + upb::Def::Freeze(to_freeze_, &status); + ASSERT_STATUS(&status); + to_freeze_.clear(); +} + +const MessageDef* DefBuilder::GetMessageDef(const goog::Descriptor* d) { + const MessageDef* ret = GetMaybeUnfrozenMessageDef(d, NULL); + Freeze(); + return ret; +} + +const MessageDef* DefBuilder::GetMessageDefExpandWeak( + const goog::Message& m) { + const MessageDef* ret = GetMaybeUnfrozenMessageDef(m.GetDescriptor(), &m); + Freeze(); + return ret; +} + + +/* CodeCache *****************************************************************/ + +const Handlers* CodeCache::GetMaybeUnfrozenWriteHandlers( + const MessageDef* md, const goog::Message& m) { + const Handlers* cached = FindInCache(md); + if (cached) return cached; + + Handlers* h = AddToCache(md, upb::Handlers::New(md)); + to_freeze_.push_back(h); + const goog::Descriptor* d = m.GetDescriptor(); + + for (upb::MessageDef::const_iterator i = md->begin(); i != md->end(); ++i) { + const FieldDef* upb_f = *i; + + const goog::FieldDescriptor* proto2_f = + d->FindFieldByNumber(upb_f->number()); + if (!proto2_f) { + proto2_f = d->file()->pool()->FindExtensionByNumber(d, upb_f->number()); + } + assert(proto2_f); + + if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h) +#ifdef UPB_GOOGLE3 + && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h) +#endif + ) { + // Unsupported reflection class. + // + // Should we fall back to using the public Reflection interface in this + // case? It's unclear whether it's supported behavior for users to + // create their own Reflection classes. + assert(false); + } + + if (upb_f->type() == UPB_TYPE_MESSAGE) { + const goog::Message* prototype = GetPrototype(m, proto2_f); + assert(prototype); + const upb::Handlers* sub_handlers = + GetMaybeUnfrozenWriteHandlers(upb_f->message_subdef(), *prototype); + h->SetSubHandlers(upb_f, sub_handlers); + } + } + + return h; +} + +const Handlers* CodeCache::GetWriteHandlers(const goog::Message& m) { + const MessageDef* md = def_builder_.GetMessageDefExpandWeak(m); + const Handlers* ret = GetMaybeUnfrozenWriteHandlers(md, m); + upb::Status status; + upb::Handlers::Freeze(to_freeze_, &status); + ASSERT_STATUS(&status); + to_freeze_.clear(); + return ret; +} + +upb::reffed_ptr NewWriteHandlers(const goog::Message& m) { + CodeCache cache; + return upb::reffed_ptr(cache.GetWriteHandlers(m)); +} + +} // namespace googlepb +} // namespace upb diff --git a/upb/bindings/googlepb/bridge.h b/upb/bindings/googlepb/bridge.h new file mode 100644 index 0000000..9eed51b --- /dev/null +++ b/upb/bindings/googlepb/bridge.h @@ -0,0 +1,205 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This file contains functionality for constructing upb Defs and Handlers +// corresponding to proto2 messages. Using this functionality, you can use upb +// to dynamically generate parsing code that can behave exactly like proto2's +// generated parsing code. Alternatively, you can configure things to +// read/write only a subset of the fields for higher performance when only some +// fields are needed. +// +// Example usage: +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::reffed_ptr write_myproto( +// upb::google::NewWriteHandlers(MyProto())); +// upb::reffed_ptr parse_myproto( +// upb::Decoder::NewDecoderHandlers(write_myproto.get(), true)); +// +// // The actual parsing. +// MyProto proto; +// upb::SeededPipeline<8192> pipeline(upb_realloc, NULL); +// upb::Sink* write_sink = pipeline.NewSink(write_myproto.get()); +// upb::Sink* parse_sink = pipeline.NewSink(parse_myproto.get()); +// upb::pb::Decoder* decoder = decoder_sink->GetObject(); +// upb::pb::ResetDecoderSink(decoder, write_sink); +// write_sink->Reset(&proto); +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_GOOGLE_BRIDGE_H_ +#define UPB_GOOGLE_BRIDGE_H_ + +#include +#include +#include "upb/handlers.h" +#include "upb/upb.h" + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Descriptor; +class EnumDescriptor; +class Message; +} // namespace protobuf +} // namespace google + +namespace proto2 { +class FieldDescriptor; +class Descriptor; +class EnumDescriptor; +class Message; +} + +namespace upb { + +namespace googlepb { + +// Returns a upb::Handlers object that can be used to populate a proto2::Message +// object of the same type as "m." For more control over handler caching and +// reuse, instantiate a CodeCache object below. +upb::reffed_ptr NewWriteHandlers(const proto2::Message& m); +upb::reffed_ptr NewWriteHandlers( + const ::google::protobuf::Message& m); + +// Builds upb::Defs from proto2::Descriptors, and caches all built Defs for +// reuse. CodeCache (below) uses this internally; there is no need to use this +// class directly unless you only want Defs without corresponding Handlers. +// +// This class is NOT thread-safe. +class DefBuilder { + public: + // Functions to get or create a Def from a corresponding proto2 Descriptor. + // The returned def will be frozen. + // + // The caller must take a ref on the returned value if it needs it long-term. + // The DefBuilder will retain a ref so it can keep the Def cached, but + // garbage-collection functionality may be added to DefBuilder later that + // could unref the returned pointer. + const EnumDef* GetEnumDef(const proto2::EnumDescriptor* d); + const EnumDef* GetEnumDef(const ::google::protobuf::EnumDescriptor* d); + const MessageDef* GetMessageDef(const proto2::Descriptor* d); + const MessageDef* GetMessageDef(const ::google::protobuf::Descriptor* d); + + // Gets or creates a frozen MessageDef, properly expanding weak fields. + // + // Weak fields are only represented as BYTES fields in the Descriptor (unless + // you construct your descriptors in a somewhat complicated way; see + // https://goto.google.com/weak-field-descriptor), but we can get their true + // definitions relatively easily from the proto Message class. + const MessageDef* GetMessageDefExpandWeak(const proto2::Message& m); + const MessageDef* GetMessageDefExpandWeak( + const ::google::protobuf::Message& m); + + // Static methods for converting a def without building a DefBuilder. + static reffed_ptr NewMessageDef( + const proto2::Descriptor* d) { + DefBuilder builder; + return reffed_ptr(builder.GetMessageDef(d)); + } + + private: + // Like GetMessageDef*(), except the returned def might not be frozen. + // We need this function because circular graphs of MessageDefs need to all + // be frozen together, to we have to create the graphs of defs in an unfrozen + // state first. + // + // If m is non-NULL, expands weak message fields. + const MessageDef* GetMaybeUnfrozenMessageDef(const proto2::Descriptor* d, + const proto2::Message* m); + const MessageDef* GetMaybeUnfrozenMessageDef( + const ::google::protobuf::Descriptor* d, + const ::google::protobuf::Message* m); + + // Returns a new-unfrozen FieldDef corresponding to this FieldDescriptor. + // The return value is always newly created (never cached) and the returned + // pointer is the only owner of it. + // + // If "m" is non-NULL, expands the weak field if it is one, and populates + // *subm_prototype with a prototype of the submessage if this is a weak or + // non-weak MESSAGE or GROUP field. + reffed_ptr NewFieldDef(const proto2::FieldDescriptor* f, + const proto2::Message* m); + reffed_ptr NewFieldDef(const ::google::protobuf::FieldDescriptor* f, + const ::google::protobuf::Message* m); + + // Freeze all defs that haven't been frozen yet. + void Freeze(); + + template + T* AddToCache(const void *proto2_descriptor, reffed_ptr def) { + assert(def_cache_.find(proto2_descriptor) == def_cache_.end()); + def_cache_[proto2_descriptor] = def; + return def.get(); // Continued lifetime is guaranteed by cache. + } + + template + const T* FindInCache(const void *proto2_descriptor) { + DefCache::iterator iter = def_cache_.find(proto2_descriptor); + return iter == def_cache_.end() ? NULL : + upb::down_cast(iter->second.get()); + } + + private: + // Maps a proto2 descriptor to the corresponding upb Def we have constructed. + // The proto2 descriptor is void* because the proto2 descriptor types do not + // share a common base. + typedef std::map > DefCache; + DefCache def_cache_; + + // Defs that have not been frozen yet. + std::vector to_freeze_; +}; + +// Builds and caches upb::Handlers for populating proto2 generated classes. +// +// This class is NOT thread-safe. +class CodeCache { + public: + // Gets or creates handlers for populating messages of the given message type. + // + // The caller must take a ref on the returned value if it needs it long-term. + // The CodeCache will retain a ref so it can keep the Def cached, but + // garbage-collection functionality may be added to CodeCache later that could + // unref the returned pointer. + const Handlers* GetWriteHandlers(const proto2::Message& m); + const Handlers* GetWriteHandlers(const ::google::protobuf::Message& m); + + private: + const Handlers* GetMaybeUnfrozenWriteHandlers(const MessageDef* md, + const proto2::Message& m); + const Handlers* GetMaybeUnfrozenWriteHandlers( + const MessageDef* md, const ::google::protobuf::Message& m); + + Handlers* AddToCache(const MessageDef* md, reffed_ptr handlers) { + assert(handlers_cache_.find(md) == handlers_cache_.end()); + handlers_cache_[md] = handlers; + return handlers.get(); // Continue lifetime is guaranteed by the cache. + } + + const Handlers* FindInCache(const MessageDef* md) { + HandlersCache::iterator iter = handlers_cache_.find(md); + return iter == handlers_cache_.end() ? NULL : iter->second.get(); + } + + DefBuilder def_builder_; + + typedef std::map > + HandlersCache; + HandlersCache handlers_cache_; + + std::vector to_freeze_; +}; + +} // namespace googlepb +} // namespace upb + +#endif // UPB_GOOGLE_BRIDGE_H_ diff --git a/upb/bindings/googlepb/proto1.cc b/upb/bindings/googlepb/proto1.cc new file mode 100644 index 0000000..c317cdf --- /dev/null +++ b/upb/bindings/googlepb/proto1.cc @@ -0,0 +1,483 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This set of handlers can write into a proto2::Message whose reflection class +// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the +// name "Proto2Reflection" indicates that it is a reflection class implementing +// the proto2 reflection interface, but is used for proto1 generated messages). +// +// Like FieldAccessor this depends on breaking encapsulation, and will need to +// be changed if and when the details of _pi::Proto2Reflection change. +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ + +#include "upb/bindings/googlepb/proto1.h" + +#include + +#include "net/proto2/public/repeated_field.h" +#include "net/proto/internal_layout.h" +#include "net/proto/proto2_reflection.h" +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/shim/shim.h" +#include "upb/sink.h" + +// Unconditionally evaluate, but also assert in debug mode. +#define CHKRET(x) do { bool ok = (x); UPB_UNUSED(ok); assert(ok); } while (0) + +template static T* GetPointer(void* message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} + +namespace upb { +namespace google { + +class P2R_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, const upb::FieldDef* upb_f, + upb::Handlers* h) { + const proto2::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions don't exist in proto1. + assert(!proto2_f->is_extension()); + +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_##name: \ + case _pi::CREP_OPTIONAL_##name: \ + case _pi::CREP_REPEATED_##name: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); \ + return true; + + switch (r->GetFieldLayout(proto2_f)->crep) { + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: + SetOutOfLineStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: + SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: + SetMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: + case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2: + SetWeakMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: + assert(false); + return false; + } + } + +#undef PRIMITIVE + + // If the field "f" in the message "m" is a weak field, returns the prototype + // of the submessage (which may be a specific type or may be OpaqueMessage). + // Otherwise returns NULL. + static const proto2::Message* GetWeakPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast(m.GetReflection()); + if (!r) return NULL; + + const _pi::Field* field = r->GetFieldLayout(f); + if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + return static_cast( + field->weak_layout()->default_instance); + } else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) { + return field->proto2_weak_default_instance(); + } else { + return NULL; + } + } + + // If "m" is a message that uses Proto2Reflection, returns the prototype of + // the submessage (which may be OpaqueMessage for a weak field that is not + // linked in). Otherwise returns NULL. + static const proto2::Message* GetFieldPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const proto2::Message* ret = GetWeakPrototype(m, f); + if (ret) { + return ret; + } else if (dynamic_cast(m.GetReflection())) { + // Since proto1 has no dynamic message, it must be from the generated + // factory. + assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE); + ret = proto2::MessageFactory::generated_factory()->GetPrototype( + f->message_type()); + assert(ret); + return ret; + } else { + return NULL; + } + } + + private: + class FieldOffset { + public: + FieldOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template T* GetFieldPointer(proto2::Message* message) const { + return GetPointer(message, offset_); + } + + void SetHasbit(void* message) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer(message, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int16_t GetHasbit(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + assert(!f->is_repeated()); + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + + static uint16_t GetOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + // StartSequence ///////////////////////////////////////////////////////////// + + template + static void SetStartRepeatedField( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(PushOffset >, + new FieldOffset(proto2_f, r)))); + } + + template + static void SetStartRepeatedPtrField( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(PushOffset >, + new FieldOffset(proto2_f, r)))); + } + + static void SetStartRepeatedSubmessageField( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBind(PushOffset, + new FieldOffset(proto2_f, r)))); + } + + template + static T* PushOffset(proto2::Message* m, const FieldOffset* offset) { + return offset->GetFieldPointer(m); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template + static void SetPrimitiveHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartRepeatedField(proto2_f, r, f, h); + CHKRET(h->SetValueHandler(f, UpbMakeHandlerT(Append))); + } else { + CHKRET( + upb::Shim::Set(h, f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r))); + } + } + + template + static void Append(proto2::RepeatedField* r, T val) { + // Proto1's ProtoArray class derives from proto2::RepeatedField. + r->Add(val); + } + + // String //////////////////////////////////////////////////////////////////// + + static void SetStringHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, UpbMakeHandler(OnStringBuf)); + if (f->IsSequence()) { + SetStartRepeatedPtrField(proto2_f, r, f, h); + CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedString))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartString, new FieldOffset(proto2_f, r)))); + } + } + + static string* StartString(proto2::Message* m, const FieldOffset* info, + size_t size_hint) { + info->SetHasbit(m); + string* str = info->GetFieldPointer(m); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + static void OnStringBuf(string* s, const char* buf, size_t n) { + s->append(buf, n); + } + + static string* StartRepeatedString(proto2::RepeatedPtrField* r, + size_t size_hint) { + string* str = r->Add(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static void SetOutOfLineStringHandlers( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + // This type is only used for non-repeated string fields. + assert(!f->IsSequence()); + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartOutOfLineString, new FieldOffset(proto2_f, r)))); + CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnStringBuf))); + } + + static string* StartOutOfLineString(proto2::Message* m, + const FieldOffset* info, + size_t size_hint) { + info->SetHasbit(m); + string** str = info->GetFieldPointer(m); + if (*str == &::proto2::internal::GetEmptyString()) + *str = new string(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartRepeatedField(proto2_f, r, f, h); + CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedCord))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartCord, new FieldOffset(proto2_f, r)))); + } + CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnCordBuf))); + } + + static Cord* StartCord(proto2::Message* m, const FieldOffset* offset, + size_t size_hint) { + UPB_UNUSED(size_hint); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static void OnCordBuf(Cord* c, const char* buf, size_t n) { + c->Append(StringPiece(buf, n)); + } + + static Cord* StartRepeatedCord(proto2::RepeatedField* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + return r->Add(); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData(const proto2::Message& prototype, + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : FieldOffset(f, r) { + prototype_ = GetWeakPrototype(prototype, f); + if (!prototype_) prototype_ = GetFieldPrototype(prototype, f); + } + + const proto2::Message* prototype() const { return prototype_; } + + private: + const proto2::Message* prototype_; + }; + + static void SetRequiredMessageHandlers( + const proto2::FieldDescriptor* proto2_f, const proto2::Message& m, + const _pi::Proto2Reflection* r, const upb::FieldDef* f, + upb::Handlers* h) { + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, + new SubMessageHandlerData(m, proto2_f, r)))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRequiredSubMessage, new FieldOffset(proto2_f, r)))); + } + } + + static proto2::Message* StartRequiredSubMessage(proto2::Message* m, + const FieldOffset* offset) { + offset->SetHasbit(m); + return offset->GetFieldPointer(m); + } + + static void SetMessageHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + std::unique_ptr data( + new SubMessageHandlerData(m, proto2_f, r)); + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartSubMessage, data.release()))); + } + } + + static void SetWeakMessageHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + std::unique_ptr data( + new SubMessageHandlerData(m, proto2_f, r)); + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartWeakSubMessage, data.release()))); + } + } + + static void* StartSubMessage(proto2::Message* m, + const SubMessageHandlerData* info) { + info->SetHasbit(m); + proto2::Message** subm = info->GetFieldPointer(m); + if (*subm == info->prototype()) *subm = (*subm)->New(); + return *subm; + } + + static void* StartWeakSubMessage(proto2::Message* m, + const SubMessageHandlerData* info) { + info->SetHasbit(m); + proto2::Message** subm = info->GetFieldPointer(m); + if (*subm == NULL) { + *subm = info->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef proto2::Message Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + UPB_UNUSED(t); + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static proto2::Message* StartRepeatedSubMessage( + proto2::internal::RepeatedPtrFieldBase* r, + const SubMessageHandlerData* info) { + proto2::Message* submsg = r->AddFromCleared(); + if (!submsg) { + submsg = info->prototype()->New(); + r->AddAllocated(submsg); + } + return submsg; + } +}; + +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return P2R_Handlers::TrySet(proto2_f, m, upb_f, h); +} + +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetWeakPrototype(m, f); +} + +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/bindings/googlepb/proto1.h b/upb/bindings/googlepb/proto1.h new file mode 100644 index 0000000..eb550ac --- /dev/null +++ b/upb/bindings/googlepb/proto1.h @@ -0,0 +1,51 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Support for registering field handlers that can write into a legacy proto1 +// message. This functionality is only needed inside Google. +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO1_H_ +#define UPB_GOOGLE_PROTO1_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetProto1WeakPrototype(const proto2::Message& m, + const proto2::FieldDescriptor* f); + +// Returns a prototype for the given non-weak field in "m". +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO1_H_ diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc new file mode 100644 index 0000000..c0b4907 --- /dev/null +++ b/upb/bindings/googlepb/proto2.cc @@ -0,0 +1,816 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/bindings/googlepb/proto2.h" + +#include "upb/def.h" +#include "upb/bindings/googlepb/proto1.h" +#include "upb/handlers.h" +#include "upb/shim/shim.h" +#include "upb/sink.h" + +namespace { + +template To CheckDownCast(From* f) { + assert(f == NULL || dynamic_cast(f) != NULL); + return static_cast(f); +} + +} + +// Unconditionally evaluate, but also assert in debug mode. +#define CHKRET(x) do { bool ok = (x); UPB_UNUSED(ok); assert(ok); } while (0) + +namespace upb { +namespace google_google3 { class GMR_Handlers; } +namespace google_opensource { class GMR_Handlers; } +} // namespace upb + +// BEGIN DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////// + +#ifdef UPB_GOOGLE3 + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/extension_set.h" +#include "net/proto2/public/generated_message_reflection.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/repeated_field.h" +#include "net/proto2/public/string_piece_field_support.h" + +namespace goog = ::proto2; +namespace me = ::upb::google_google3; + +#else + +// TODO(haberman): remove these once new versions of protobuf that "friend" +// upb are pervasive in the wild. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/extension_set.h" +#include "google/protobuf/message.h" + +namespace goog = ::google::protobuf; +namespace me = ::upb::google_opensource; + +using goog::int32; +using goog::int64; +using goog::uint32; +using goog::uint64; +using goog::scoped_ptr; + +#endif // ifdef UPB_GOOGLE3 + +// END DOUBLE COMPILATION TRICKERY. //////////////////////////////////////////// + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +template static T* GetPointer(void* message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} +template +static const T* GetConstPointer(const void* message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} + +// This class contains handlers that can write into a proto2 class whose +// reflection class is GeneratedMessageReflection. (Despite the name, even +// DynamicMessage uses GeneratedMessageReflection, so this covers all proto2 +// messages generated by the compiler.) To do this it must break the +// encapsulation of GeneratedMessageReflection and therefore depends on +// internal interfaces that are not guaranteed to be stable. This class will +// need to be updated if any non-backward-compatible changes are made to +// GeneratedMessageReflection. +class me::GMR_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const goog::FieldDescriptor* proto2_f, + const goog::Message& m, const upb::FieldDef* upb_f, + upb::Handlers* h) { + const goog::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast(base_r); + if (!r) return false; + +#define PRIMITIVE_TYPE(cpptype, cident) \ +case goog::FieldDescriptor::cpptype: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); \ + return true; + + switch (proto2_f->cpp_type()) { + PRIMITIVE_TYPE(CPPTYPE_INT32, int32); + PRIMITIVE_TYPE(CPPTYPE_INT64, int64); + PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32); + PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64); + PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double); + PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); + PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); + case goog::FieldDescriptor::CPPTYPE_ENUM: + if (proto2_f->is_extension()) { + SetEnumExtensionHandlers(proto2_f, r, upb_f, h); + } else { + SetEnumHandlers(proto2_f, r, upb_f, h); + } + return true; + case goog::FieldDescriptor::CPPTYPE_STRING: { + if (proto2_f->is_extension()) { +#ifdef UPB_GOOGLE3 + SetStringExtensionHandlers(proto2_f, r, upb_f, h); +#else + SetStringExtensionHandlers(proto2_f, r, upb_f, h); +#endif + return true; + } + + // Old versions of the open-source protobuf release erroneously default + // to Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() + : UPB_CTYPE_STRING; + switch (ctype) { +#ifdef UPB_GOOGLE3 + case goog::FieldOptions::STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::STRING_PIECE: + SetStringPieceHandlers(proto2_f, r, upb_f, h); + return true; +#else + case UPB_CTYPE_STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + case goog::FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (proto2_f->options().lazy()) { + assert(false); + return false; // Not yet implemented. + } +#endif + if (proto2_f->is_extension()) { + SetSubMessageExtensionHandlers(proto2_f, m, r, upb_f, h); + return true; + } + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: + return false; + } + } + +#undef PRIMITIVE_TYPE + + static const goog::Message* GetFieldPrototype( + const goog::Message& m, const goog::FieldDescriptor* f) { + // We assume that all submessages (and extensions) will be constructed + // using the same MessageFactory as this message. This doesn't cover the + // case of CodedInputStream::SetExtensionRegistry(). + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast( + m.GetReflection()); + if (!r) return NULL; + return r->message_factory_->GetPrototype(f->message_type()); + } + + private: + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int64_t GetHasbit( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + // proto2 does not store hasbits for repeated fields. + assert(!f->is_repeated()); + return (r->has_bits_offset_ * 8) + f->index(); + } + + static uint16_t GetOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + class FieldOffset { + public: + FieldOffset(const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template T* GetFieldPointer(goog::Message* message) const { + return GetPointer(message, offset_); + } + + void SetHasbit(void* m) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer(m, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + class ExtensionFieldData { + public: + ExtensionFieldData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(r->extensions_offset_), + number_(proto2_f->number()), + type_(proto2_f->type()) { + } + + int number() const { return number_; } + goog::internal::FieldType type() const { return type_; } + + goog::internal::ExtensionSet* GetExtensionSet(goog::Message* m) const { + return GetPointer(m, offset_); + } + + private: + const size_t offset_; + int number_; + goog::internal::FieldType type_; + }; + + // StartSequence ///////////////////////////////////////////////////////////// + + template + static void SetStartRepeatedField( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(&PushOffset >, + new FieldOffset(proto2_f, r)))); + } + + template + static void SetStartRepeatedPtrField( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(&PushOffset >, + new FieldOffset(proto2_f, r)))); + } + + static void SetStartRepeatedSubmessageField( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBind(&PushOffset, + new FieldOffset(proto2_f, r)))); + } + + template + static T* PushOffset(goog::Message* message, const FieldOffset* offset) { + return offset->GetFieldPointer(message); + } + + // Primitive Value (numeric, bool) /////////////////////////////////////////// + + template static void SetPrimitiveHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (proto2_f->is_extension()) { + scoped_ptr data(new ExtensionFieldData(proto2_f, r)); + if (f->IsSequence()) { + CHKRET(h->SetValueHandler( + f, UpbBindT(AppendPrimitiveExtension, data.release()))); + } else { + CHKRET(h->SetValueHandler( + f, UpbBindT(SetPrimitiveExtension, data.release()))); + } + } else { + if (f->IsSequence()) { + SetStartRepeatedField(proto2_f, r, f, h); + CHKRET(h->SetValueHandler(f, UpbMakeHandlerT(AppendPrimitive))); + } else { + CHKRET(upb::Shim::Set(h, f, GetOffset(proto2_f, r), + GetHasbit(proto2_f, r))); + } + } + } + + template + static void AppendPrimitive(goog::RepeatedField* r, T val) { r->Add(val); } + + template + static void AppendPrimitiveExtension(goog::Message* m, + const ExtensionFieldData* data, T val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // TODO(haberman): give an accurate value for "packed" + goog::internal::RepeatedPrimitiveTypeTraits::Add( + data->number(), data->type(), true, val, set); + } + + template + static void SetPrimitiveExtension(goog::Message* m, + const ExtensionFieldData* data, T val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + goog::internal::PrimitiveTypeTraits::Set(data->number(), data->type(), + val, set); + } + + // Enum ////////////////////////////////////////////////////////////////////// + + class EnumHandlerData : public FieldOffset { + public: + EnumHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f) + : FieldOffset(proto2_f, r), + field_number_(f->number()), + unknown_fields_offset_(r->unknown_fields_offset_), + enum_(upb_downcast_enumdef(f->subdef())) {} + + bool IsValidValue(int32_t val) const { + return enum_->FindValueByNumber(val) != NULL; + } + + int32_t field_number() const { return field_number_; } + + goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { + return GetPointer(m, unknown_fields_offset_); + } + + private: + int32_t field_number_; + size_t unknown_fields_offset_; + const upb::EnumDef* enum_; + }; + + static void SetEnumHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); + scoped_ptr data(new EnumHandlerData(proto2_f, r, f)); + if (f->IsSequence()) { + CHKRET(h->SetInt32Handler(f, UpbBind(AppendEnum, data.release()))); + } else { + CHKRET(h->SetInt32Handler(f, UpbBind(SetEnum, data.release()))); + } + } + + static void SetEnum(goog::Message* m, const EnumHandlerData* data, + int32_t val) { + if (data->IsValidValue(val)) { + int32_t* message_val = data->GetFieldPointer(m); + *message_val = val; + data->SetHasbit(m); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + } + + static void AppendEnum(goog::Message* m, const EnumHandlerData* data, + int32_t val) { + // Closure is the enclosing message. We can't use the RepeatedField<> as + // the closure because we need to go back to the message for unrecognized + // enum values, which go into the unknown field set. + if (data->IsValidValue(val)) { + goog::RepeatedField* r = + data->GetFieldPointer >(m); + r->Add(val); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + } + + // EnumExtension ///////////////////////////////////////////////////////////// + + static void SetEnumExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(proto2_f->is_extension()); + scoped_ptr data(new ExtensionFieldData(proto2_f, r)); + if (f->IsSequence()) { + CHKRET( + h->SetInt32Handler(f, UpbBind(AppendEnumExtension, data.release()))); + } else { + CHKRET(h->SetInt32Handler(f, UpbBind(SetEnumExtension, data.release()))); + } + } + + static void SetEnumExtension(goog::Message* m, const ExtensionFieldData* data, + int32_t val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + set->SetEnum(data->number(), data->type(), val, NULL); + } + + static void AppendEnumExtension(goog::Message* m, + const ExtensionFieldData* data, int32_t val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // TODO(haberman): give an accurate value for "packed" + set->AddEnum(data->number(), data->type(), true, val, NULL); + } + + // String //////////////////////////////////////////////////////////////////// + + // For scalar (non-repeated) string fields. + template class StringHandlerData : public FieldOffset { + public: + StringHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r) + : FieldOffset(proto2_f, r), + prototype_(*GetConstPointer(r->default_instance_, + GetOffset(proto2_f, r))) {} + + const T* prototype() const { return prototype_; } + + T** GetStringPointer(goog::Message* message) const { + return GetFieldPointer(message); + } + + private: + const T* prototype_; + }; + + template static void SetStringHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + assert(!proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandlerT(&OnStringBuf))); + if (f->IsSequence()) { + SetStartRepeatedPtrField(proto2_f, r, f, h); + CHKRET( + h->SetStartStringHandler(f, UpbMakeHandlerT(StartRepeatedString))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBindT(StartString, new StringHandlerData(proto2_f, r)))); + } + } + + // This needs to be templated because google3 string is not std::string. + template + static T* StartString(goog::Message* m, const StringHandlerData* data, + size_t size_hint) { + UPB_UNUSED(size_hint); + T** str = data->GetStringPointer(m); + data->SetHasbit(m); + // If it points to the default instance, we must create a new instance. + if (*str == data->prototype()) *str = new T(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + template + static void OnStringBuf(T* str, const char* buf, size_t n) { + str->append(buf, n); + } + + template + static T* StartRepeatedString(goog::RepeatedPtrField* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + T* str = r->Add(); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // StringExtension /////////////////////////////////////////////////////////// + + template + static void SetStringExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandlerT(OnStringBuf))); + scoped_ptr data(new ExtensionFieldData(proto2_f, r)); + if (f->IsSequence()) { + CHKRET(h->SetStartStringHandler( + f, UpbBindT(StartRepeatedStringExtension, data.release()))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBindT(StartStringExtension, data.release()))); + } + } + + // Templated because google3 is not std::string. + template + static T* StartStringExtension(goog::Message* m, + const ExtensionFieldData* data, + size_t size_hint) { + UPB_UNUSED(size_hint); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->MutableString(data->number(), data->type(), NULL); + } + + template + static T* StartRepeatedStringExtension(goog::Message* m, + const ExtensionFieldData* data, + size_t size_hint) { + UPB_UNUSED(size_hint); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->AddString(data->number(), data->type(), NULL); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData(const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : FieldOffset(f, r), prototype_(prototype) {} + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageHandlers( + const goog::FieldDescriptor* proto2_f, const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + const goog::Message* field_prototype = GetFieldPrototype(m, proto2_f); + scoped_ptr data( + new SubMessageHandlerData(proto2_f, r, field_prototype)); + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartSubMessage, data.release()))); + } + } + + static goog::Message* StartSubMessage(goog::Message* m, + const SubMessageHandlerData* data) { + data->SetHasbit(m); + goog::Message** subm = data->GetFieldPointer(m); + if (*subm == NULL || *subm == data->prototype()) { + *subm = data->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef goog::Message Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + UPB_UNUSED(t); + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static goog::Message* StartRepeatedSubMessage( + goog::internal::RepeatedPtrFieldBase* r, + const SubMessageHandlerData* data) { + goog::Message* submsg = r->AddFromCleared(); + if (!submsg) { + submsg = data->prototype()->New(); + r->AddAllocated(submsg); + } + return submsg; + } + + // SubMessageExtension /////////////////////////////////////////////////////// + + class SubMessageExtensionHandlerData : public ExtensionFieldData { + public: + SubMessageExtensionHandlerData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : ExtensionFieldData(proto2_f, r), + prototype_(prototype) { + } + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + const goog::Message* field_prototype = GetFieldPrototype(m, proto2_f); + scoped_ptr data( + new SubMessageExtensionHandlerData(proto2_f, r, field_prototype)); + if (f->IsSequence()) { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessageExtension, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartSubMessageExtension, data.release()))); + } + } + + static goog::Message* StartRepeatedSubMessageExtension( + goog::Message* m, const SubMessageExtensionHandlerData* data) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // Because we found this message via a descriptor, we know it has a + // descriptor and is therefore a Message and not a MessageLite. + // Alternatively we could just use goog::MessageLite everywhere to avoid + // this, but since they are in fact goog::Messages, it seems most clear + // to refer to them as such. + return CheckDownCast(set->AddMessage( + data->number(), data->type(), *data->prototype(), NULL)); + } + + static goog::Message* StartSubMessageExtension( + goog::Message* m, const SubMessageExtensionHandlerData* data) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // See comment above re: this down cast. + return CheckDownCast(set->MutableMessage( + data->number(), data->type(), *data->prototype(), NULL)); + } + + // TODO(haberman): handle Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandler(&OnCordBuf))); + if (f->IsSequence()) { + SetStartRepeatedField(proto2_f, r, f, h); + CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedCord))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartCord, new FieldOffset(proto2_f, r)))); + } + } + + static Cord* StartCord(goog::Message* m, const FieldOffset* offset, + size_t size_hint) { + UPB_UNUSED(size_hint); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static void OnCordBuf(Cord* c, const char* buf, size_t n, + const upb::BufferHandle* handle) { + const Cord* source_cord = handle->GetAttachedObject(); + if (source_cord) { + // This TODO is copied from CordReader::CopyToCord(): + // "We could speed this up by using CordReader internals." + Cord piece(*source_cord); + piece.RemovePrefix(handle->object_offset() + (buf - handle->buffer())); + assert(piece.size() >= n); + piece.RemoveSuffix(piece.size() - n); + + c->Append(piece); + } else { + c->Append(StringPiece(buf, n)); + } + } + + static Cord* StartRepeatedCord(proto2::RepeatedField* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + return r->Add(); + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static void SetStringPieceHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnStringPieceBuf))); + if (f->IsSequence()) { + SetStartRepeatedPtrField(proto2_f, r, + f, h); + CHKRET(h->SetStartStringHandler( + f, UpbMakeHandler(StartRepeatedStringPiece))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartStringPiece, new FieldOffset(proto2_f, r)))); + } + } + + static void OnStringPieceBuf(proto2::internal::StringPieceField* field, + const char* buf, size_t len) { + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + size_t new_len = field->size() + len; + char* data = new char[new_len]; + memcpy(data, field->data(), field->size()); + memcpy(data + field->size(), buf, len); + field->CopyFrom(StringPiece(data, new_len)); + delete[] data; + } + + static proto2::internal::StringPieceField* StartStringPiece( + goog::Message* m, const FieldOffset* offset, size_t size_hint) { + UPB_UNUSED(size_hint); + offset->SetHasbit(m); + proto2::internal::StringPieceField* field = + offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static proto2::internal::StringPieceField* StartRepeatedStringPiece( + proto2::RepeatedPtrField* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + proto2::internal::StringPieceField* field = r->Add(); + field->Clear(); + return field; + } + +#endif // UPB_GOOGLE3 +}; + +namespace upb { +namespace google { + +bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f, + const goog::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return me::GMR_Handlers::TrySet(proto2_f, prototype, upb_f, h); +} + +const goog::Message* GetFieldPrototype(const goog::Message& m, + const goog::FieldDescriptor* f) { + return me::GMR_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/bindings/googlepb/proto2.h b/upb/bindings/googlepb/proto2.h new file mode 100644 index 0000000..516b7fd --- /dev/null +++ b/upb/bindings/googlepb/proto2.h @@ -0,0 +1,61 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Support for registering field handlers that can write into a proto2 +// message that uses GeneratedMessageReflection (which includes all messages +// generated by the proto2 compiler as well as DynamicMessage). +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO2_H_ +#define UPB_GOOGLE_PROTO2_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +} +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); +bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f, + const ::google::protobuf::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetFieldPrototype(const proto2::Message& m, + const proto2::FieldDescriptor* f); +const ::google::protobuf::Message* GetFieldPrototype( + const ::google::protobuf::Message& m, + const ::google::protobuf::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO2_H_ -- cgit v1.2.3