From 7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 15 Feb 2013 16:27:18 -0800 Subject: Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). --- upb/google/README | 16 ++ upb/google/bridge.cc | 260 +++++++++++++++++++++ upb/google/bridge.h | 76 +++++++ upb/google/cord.h | 48 ++++ upb/google/proto1.cc | 502 ++++++++++++++++++++++++++++++++++++++++ upb/google/proto1.h | 53 +++++ upb/google/proto2.cc | 632 +++++++++++++++++++++++++++++++++++++++++++++++++++ upb/google/proto2.h | 62 +++++ 8 files changed, 1649 insertions(+) create mode 100644 upb/google/README create mode 100644 upb/google/bridge.cc create mode 100644 upb/google/bridge.h create mode 100644 upb/google/cord.h create mode 100644 upb/google/proto1.cc create mode 100644 upb/google/proto1.h create mode 100644 upb/google/proto2.cc create mode 100644 upb/google/proto2.h (limited to 'upb/google') diff --git a/upb/google/README b/upb/google/README new file mode 100644 index 0000000..a237583 --- /dev/null +++ b/upb/google/README @@ -0,0 +1,16 @@ +This directory contains code to interoperate with Google's official +Protocol Buffers release. Since it doesn't really have a name +besides "protobuf," calling this directory "google" seems like the +least confusing option. + +We support writing into protobuf's generated classes (and hopefully +reading too, before long). We support both the open source protobuf +release and the Google-internal version of the same code. The two +live in different namespaces, and the internal version supports some +features that are not supported in the open-source release. Also, the +internal version includes the legacy "proto1" classes which we must +support; thankfully this is mostly relegated to its own separate file. + +Our functionality requires the full google::protobuf::Message +interface; we rely on reflection so we know what fields to read/write +and where to put them, so we can't support MessageLite. diff --git a/upb/google/bridge.cc b/upb/google/bridge.cc new file mode 100644 index 0000000..4d64ab8 --- /dev/null +++ b/upb/google/bridge.cc @@ -0,0 +1,260 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/google/bridge.h" + +#include +#include +#include "upb/def.h" +#include "upb/google/proto1.h" +#include "upb/google/proto2.h" +#include "upb/handlers.h" + +namespace upb { +namespace proto2_bridge_google3 { class Defs; } +namespace proto2_bridge_opensource { class Defs; } +} // namespace upb + +#ifdef UPB_GOOGLE3 +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/message.h" +#include "net/proto2/proto/descriptor.pb.h" +namespace goog = ::proto2; +namespace me = ::upb::proto2_bridge_google3; +#else +#include "google/protobuf/descriptor.h" +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.pb.h" +namespace goog = ::google::protobuf; +namespace me = ::upb::proto2_bridge_opensource; +#endif + +class me::Defs { + public: + void OnMessage(Handlers* h) { + const upb::MessageDef* md = h->message_def(); + const goog::Message& m = *message_map_[md]; + const goog::Descriptor* d = m.GetDescriptor(); + for (upb::MessageDef::ConstIterator i(md); !i.Done(); i.Next()) { + const upb::FieldDef* upb_f = i.field(); + const goog::FieldDescriptor* proto2_f = + d->FindFieldByNumber(upb_f->number()); + if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h) +#ifdef UPB_GOOGLE3 + && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h) +#endif + ) { + // Unsupported reflection class. + // + // Should we fall back to using the public Reflection interface in this + // case? It's unclear whether it's supported behavior for users to + // create their own Reflection classes. + assert(false); + } + } + } + + static void StaticOnMessage(void *closure, upb::Handlers* handlers) { + me::Defs* defs = static_cast(closure); + defs->OnMessage(handlers); + } + + void AddSymbol(const std::string& name, upb::Def* def) { + assert(symbol_map_.find(name) == symbol_map_.end()); + symbol_map_[name] = def; + } + + void AddMessage(const goog::Message* m, upb::MessageDef* md) { + assert(message_map_.find(md) == message_map_.end()); + message_map_[md] = m; + AddSymbol(m->GetDescriptor()->full_name(), md->Upcast()); + } + + upb::Def* FindSymbol(const std::string& name) { + SymbolMap::iterator iter = symbol_map_.find(name); + return iter != symbol_map_.end() ? iter->second : NULL; + } + + void Flatten(std::vector* defs) { + SymbolMap::iterator iter; + for (iter = symbol_map_.begin(); iter != symbol_map_.end(); ++iter) { + defs->push_back(iter->second); + } + } + + private: + // Maps a new upb::MessageDef* to a corresponding proto2 Message* whose + // derived class is of the correct type according to the message the user + // gave us. + typedef std::map MessageMap; + MessageMap message_map_; + + // Maps a type name to a upb Def we have constructed to represent it. + typedef std::map SymbolMap; + SymbolMap symbol_map_; +}; + +namespace upb { +namespace google { + +// For submessage fields, stores a pointer to an instance of the submessage in +// *subm (but it is *not* guaranteed to be a prototype). +FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f, + upb::MessageDef* md, const goog::Message** subm) { + // To parse weak submessages effectively, we need to represent them in the + // upb::Def schema even though they are not reflected in the proto2 + // descriptors (weak fields are represented as FieldDescriptor::TYPE_BYTES). + const goog::Message* weak_prototype = NULL; +#ifdef UPB_GOOGLE3 + weak_prototype = upb::google::GetProto1WeakPrototype(m, f); +#endif + + upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f); + upb_f->set_number(f->number()); + upb_f->set_name(f->name()); + upb_f->set_label(static_cast(f->label())); + upb_f->set_type(weak_prototype ? + UPB_TYPE_MESSAGE : static_cast(f->type())); + + if (weak_prototype) { + upb_f->set_subdef_name(weak_prototype->GetDescriptor()->full_name()); + } else if (upb_f->IsSubMessage()) { + upb_f->set_subdef_name(f->message_type()->full_name()); + } else if (upb_f->type() == UPB_TYPE(ENUM)) { + // We set the enum default numerically. + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_enum()->number()))); + upb_f->set_subdef_name(f->enum_type()->full_name()); + } else { + // Set field default for primitive types. Need to switch on the upb type + // rather than the proto2 type, because upb_f->type() may have been changed + // from BYTES to MESSAGE for a weak field. + switch (upb_types[upb_f->type()].inmemory_type) { + case UPB_CTYPE_INT32: + upb_f->set_default_value(MakeValue(f->default_value_int32())); + break; + case UPB_CTYPE_INT64: + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_int64()))); + break; + case UPB_CTYPE_UINT32: + upb_f->set_default_value(MakeValue(f->default_value_uint32())); + break; + case UPB_CTYPE_UINT64: + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_uint64()))); + break; + case UPB_CTYPE_DOUBLE: + upb_f->set_default_value(MakeValue(f->default_value_double())); + break; + case UPB_CTYPE_FLOAT: + upb_f->set_default_value(MakeValue(f->default_value_float())); + break; + case UPB_CTYPE_BOOL: + upb_f->set_default_value(MakeValue(f->default_value_bool())); + break; + case UPB_CTYPE_BYTEREGION: + upb_f->set_default_string(f->default_value_string()); + break; + } + } + bool ok = md->AddField(upb_f, &upb_f); + UPB_ASSERT_VAR(ok, ok); + + if (weak_prototype) { + *subm = weak_prototype; + } else if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) { + *subm = upb::google::GetFieldPrototype(m, f); +#ifdef UPB_GOOGLE3 + if (!*subm) + *subm = upb::google::GetProto1FieldPrototype(m, f); +#endif + assert(*subm); + } + + return upb_f; +} + +upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, void *owner) { + upb::EnumDef* e = upb::EnumDef::New(owner); + e->set_full_name(desc->full_name()); + for (int i = 0; i < desc->value_count(); i++) { + const goog::EnumValueDescriptor* val = desc->value(i); + bool success = e->AddValue(val->name(), val->number(), NULL); + UPB_ASSERT_VAR(success, success); + } + return e; +} + +static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner, + me::Defs* defs) { + upb::MessageDef* md = upb::MessageDef::New(owner); + md->set_full_name(m.GetDescriptor()->full_name()); + + // Must do this before processing submessages to prevent infinite recursion. + defs->AddMessage(&m, md); + + const goog::Descriptor* d = m.GetDescriptor(); + for (int i = 0; i < d->field_count(); i++) { + const goog::FieldDescriptor* proto2_f = d->field(i); + +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (proto2_f->options().lazy()) continue; +#endif + // Extensions not supported yet. + if (proto2_f->is_extension()) continue; + + const goog::Message* subm_prototype; + upb::FieldDef* f = AddFieldDef(m, proto2_f, md, &subm_prototype); + + if (!f->HasSubDef()) continue; + + upb::Def* subdef = defs->FindSymbol(f->subdef_name()); + if (!subdef) { + if (f->type() == UPB_TYPE(ENUM)) { + subdef = NewEnumDef(proto2_f->enum_type(), owner)->Upcast(); + defs->AddSymbol(subdef->full_name(), subdef); + } else { + assert(f->IsSubMessage()); + assert(subm_prototype); + subdef = NewMessageDef(*subm_prototype, owner, defs)->Upcast(); + } + } + f->set_subdef(subdef); + } + + return md; +} + +const upb::Handlers* NewWriteHandlers(const goog::Message& m, void *owner) { + me::Defs defs; + const upb::MessageDef* md = NewMessageDef(m, owner, &defs); + + std::vector defs_vec; + defs.Flatten(&defs_vec); + Status status; + bool success = Def::Freeze(defs_vec, &status); + UPB_ASSERT_VAR(success, success); + + const upb::Handlers* ret = + upb::Handlers::NewFrozen(md, owner, me::Defs::StaticOnMessage, &defs); + + // Unref all defs, since they're now ref'd by the handlers. + for (int i = 0; i < static_cast(defs_vec.size()); i++) { + defs_vec[i]->Unref(owner); + } + + return ret; +} + +} // namespace google +} // namespace upb diff --git a/upb/google/bridge.h b/upb/google/bridge.h new file mode 100644 index 0000000..8a2256f --- /dev/null +++ b/upb/google/bridge.h @@ -0,0 +1,76 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This file contains functionality for constructing upb Defs and Handlers +// corresponding to proto2 messages. Using this functionality, you can use upb +// to dynamically generate parsing code that can behave exactly like proto2's +// generated parsing code. Alternatively, you can configure things to +// read/write only a subset of the fields for higher performance when only some +// fields are needed. +// +// Example usage (FIX XXX): +// +// // Build a def that will have all fields and parse just like proto2 would. +// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto()); +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::Handlers* handlers = upb::NewHandlersForMessage(md); +// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers); +// handlers->Unref(); +// +// // The actual parsing. +// MyProto proto; +// upb::Decoder decoder; +// upb::StringSource source(buf, len); +// decoder.ResetPlan(plan, 0); +// decoder.ResetInput(source.AllBytes(), &proto); +// CHECK(decoder.Decode() == UPB_OK) << decoder.status(); +// +// To parse only one field and skip all others: +// +// const upb::MessageDef* md = +// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype()); +// upb::proto2_bridge::AddFieldDef( +// MyProto::descriptor()->FindFieldByName("my_field"), md); +// upb::Freeze(md); +// +// // Now continue with "JIT the parser" from above. +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_GOOGLE_BRIDGE_H_ +#define UPB_GOOGLE_BRIDGE_H_ + +namespace google { +namespace protobuf { class Message; } +} // namespace google + +namespace proto2 { class Message; } + +namespace upb { + +class Handlers; + +namespace google { + +// Returns a upb::Handlers object that can be used to populate a proto2::Message +// object of the same type as "m." +// +// TODO(haberman): Add handler caching functionality so that we don't use +// O(n^2) memory in the worst case when incrementally building handlers. +const upb::Handlers* NewWriteHandlers(const proto2::Message& m, void *owner); +const upb::Handlers* NewWriteHandlers(const ::google::protobuf::Message& m, + void *owner); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_BRIDGE_H_ diff --git a/upb/google/cord.h b/upb/google/cord.h new file mode 100644 index 0000000..c579c0c --- /dev/null +++ b/upb/google/cord.h @@ -0,0 +1,48 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Functionality for interoperating with Cord. Only needed inside Google. + +#ifndef UPB_GOOGLE_CORD_H +#define UPB_GOOGLE_CORD_H + +#include "strings/cord.h" +#include "upb/bytestream.h" + +namespace upb { + +namespace proto2_bridge_google3 { class FieldAccessor; } +namespace proto2_bridge_opensource { class FieldAccessor; } + +namespace google { + +class P2R_Handlers; + +class CordSupport { + private: + UPB_DISALLOW_POD_OPS(CordSupport); + + inline static void AssignToCord(const upb::ByteRegion* r, Cord* cord) { + // TODO(haberman): ref source data if source is a cord. + cord->Clear(); + uint64_t ofs = r->start_ofs(); + while (ofs < r->end_ofs()) { + size_t len; + const char *buf = r->GetPtr(ofs, &len); + cord->Append(StringPiece(buf, len)); + ofs += len; + } + } + + friend class ::upb::proto2_bridge_google3::FieldAccessor; + friend class ::upb::proto2_bridge_opensource::FieldAccessor; + friend class P2R_Handlers; +}; + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_CORD_H diff --git a/upb/google/proto1.cc b/upb/google/proto1.cc new file mode 100644 index 0000000..bb9ff75 --- /dev/null +++ b/upb/google/proto1.cc @@ -0,0 +1,502 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This set of handlers can write into a proto2::Message whose reflection class +// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the +// name "Proto2Reflection" indicates that it is a reflection class implementing +// the proto2 reflection interface, but is used for proto1 generated messages). +// +// Like FieldAccessor this depends on breaking encapsulation, and will need to +// be changed if and when the details of _pi::Proto2Reflection change. +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ + +#include "upb/google/proto1.h" + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef private + +// TODO(haberman): friend upb so that this isn't required. +#define private public +#include "net/proto/proto2_reflection.h" +#undef private + +#include "net/proto/internal_layout.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/google/cord.h" +#include "upb/handlers.h" + +template static T* GetPointer(void *message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} + +namespace upb { +namespace google { + +class P2R_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + const proto2::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + switch (r->GetFieldLayout(proto2_f)->crep) { +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_ ## name: \ + case _pi::CREP_OPTIONAL_ ## name: \ + case _pi::CREP_REPEATED_ ## name: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); return true; + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); +#undef PRIMITIVE + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: + SetOutOfLineStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: + SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: + SetMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: + case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2: + SetWeakMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: assert(false); return false; + } + } + + // If the field "f" in the message "m" is a weak field, returns the prototype + // of the submessage (which may be a specific type or may be OpaqueMessage). + // Otherwise returns NULL. + static const proto2::Message* GetWeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast(m.GetReflection()); + if (!r) return NULL; + + const _pi::Field* field = r->GetFieldLayout(f); + if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + return static_cast( + field->weak_layout()->default_instance); + } else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) { + return field->proto2_weak_default_instance(); + } else { + return NULL; + } + } + + // If "m" is a message that uses Proto2Reflection, returns the prototype of + // the submessage (which may be OpaqueMessage for a weak field that is not + // linked in). Otherwise returns NULL. + static const proto2::Message* GetFieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const proto2::Message* ret = GetWeakPrototype(m, f); + if (ret) { + return ret; + } else if (dynamic_cast(m.GetReflection())) { + // Since proto1 has no dynamic message, it must be from the generated + // factory. + assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE); + ret = proto2::MessageFactory::generated_factory()->GetPrototype( + f->message_type()); + assert(ret); + return ret; + } else { + return NULL; + } + } + + private: + class FieldOffset { + public: + FieldOffset( + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : offset_(GetOffset(f, r)), + is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template T* GetFieldPointer(void* message) const { + return GetPointer(message, offset_); + } + + void SetHasbit(void* message) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer(message, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + + static int16_t GetHasbit(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + assert(!f->is_repeated()); + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + + static uint16_t GetOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + // StartSequence ///////////////////////////////////////////////////////////// + + static void SetStartSequenceHandler( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(f->IsSequence()); + h->SetStartSequenceHandler( + f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + + static void* PushOffset(void *m, void *fval) { + const FieldOffset* offset = static_cast(fval); + return offset->GetFieldPointer(m); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template static void SetPrimitiveHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler(f, &Append, NULL, NULL); + } else { + upb::SetStoreValueHandler( + f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + } + } + + template + static bool Append(void *_r, void *fval, T val) { + UPB_UNUSED(fval); + // Proto1's ProtoArray class derives from proto2::RepeatedField. + proto2::RepeatedField* r = static_cast*>(_r); + r->Add(val); + return true; + } + + // String //////////////////////////////////////////////////////////////////// + + static void SetStringHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartString, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartString(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* info = static_cast(fval); + info->SetHasbit(m); + string* str = info->GetFieldPointer(m); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + static size_t OnStringBuf(void *_s, void *fval, const char *buf, size_t n) { + string* s = static_cast(_s); + s->append(buf, n); + return n; + } + + static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(fval); + proto2::RepeatedPtrField* r = + static_cast*>(_r); + string* str = r->Add(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static void SetOutOfLineStringHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + // This type is only used for non-repeated string fields. + assert(!f->IsSequence()); + h->SetStartStringHandler( + f, &StartOutOfLineString, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + } + + static void* StartOutOfLineString(void *m, void *fval, size_t size_hint) { + const FieldOffset* info = static_cast(fval); + info->SetHasbit(m); + string **str = info->GetFieldPointer(m); + if (*str == &::ProtocolMessage::___empty_internal_proto_string_) + *str = new string(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnCordBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartCord(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + Cord* c = static_cast(_c); + c->Append(StringPiece(buf, n)); + return true; + } + + static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + proto2::RepeatedField* r = + static_cast*>(_r); + return r->Add(); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData( + const proto2::Message& prototype, + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : FieldOffset(f, r) { + prototype_ = GetWeakPrototype(prototype, f); + if (!prototype_) + prototype_ = GetFieldPrototype(prototype, f); + } + + const proto2::Message* prototype() const { return prototype_; } + + private: + const proto2::Message* prototype_; + }; + + static void SetStartSubMessageHandler( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + upb::Handlers::StartFieldHandler* handler, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStartSubMessageHandler( + f, handler, + new SubMessageHandlerData(m, proto2_f, r), + &upb::DeletePointer); + } + + static void SetRequiredMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + h->SetStartSubMessageHandler( + f, &StartRequiredSubMessage, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartRequiredSubMessage(void *m, void *fval) { + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + return offset->GetFieldPointer(m); + } + + static void SetMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + SetStartSubMessageHandler(proto2_f, m, r, &StartSubMessage, f, h); + } + } + + static void SetWeakMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + SetStartSubMessageHandler(proto2_f, m, r, &StartWeakSubMessage, f, h); + } + } + + static void* StartSubMessage(void *m, void *fval) { + const SubMessageHandlerData* info = + static_cast(fval); + info->SetHasbit(m); + proto2::Message **subm = info->GetFieldPointer(m); + if (*subm == info->prototype()) *subm = (*subm)->New(); + return *subm; + } + + static void* StartWeakSubMessage(void *m, void *fval) { + const SubMessageHandlerData* info = + static_cast(fval); + info->SetHasbit(m); + proto2::Message **subm = info->GetFieldPointer(m); + if (*subm == NULL) { + *subm = info->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static void* StartRepeatedSubMessage(void* _r, void *fval) { + const SubMessageHandlerData* info = + static_cast(fval); + proto2::internal::RepeatedPtrFieldBase *r = + static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + submsg = info->prototype()->New(); + r->AddAllocated(submsg); + } + return submsg; + } +}; + +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return P2R_Handlers::TrySet(proto2_f, m, upb_f, h); +} + +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetWeakPrototype(m, f); +} + +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/google/proto1.h b/upb/google/proto1.h new file mode 100644 index 0000000..f35fb13 --- /dev/null +++ b/upb/google/proto1.h @@ -0,0 +1,53 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Support for registering field handlers that can write into a legacy proto1 +// message. This functionality is only needed inside Google. +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO1_H_ +#define UPB_GOOGLE_PROTO1_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); + +// Returns a prototype for the given non-weak field in "m". +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO1_H_ diff --git a/upb/google/proto2.cc b/upb/google/proto2.cc new file mode 100644 index 0000000..264530c --- /dev/null +++ b/upb/google/proto2.cc @@ -0,0 +1,632 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/google/proto2.h" + +#include "upb/google/proto1.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/handlers.h" + +namespace upb { +namespace proto2_bridge_google3 { class FieldAccessor; } +namespace proto2_bridge_opensource { class FieldAccessor; } +} // namespace upb + +// BEGIN DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////// + +#ifdef UPB_GOOGLE3 + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef protected + +#define private public +#include "net/proto2/public/generated_message_reflection.h" +#undef private + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/string_piece_field_support.h" +#include "upb/google/cord.h" + +namespace goog = ::proto2; +namespace me = ::upb::proto2_bridge_google3; + +#else + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/message.h" + +namespace goog = ::google::protobuf; +namespace me = ::upb::proto2_bridge_opensource; + +#endif // ifdef UPB_GOOGLE3 + +// END DOUBLE COMPILATION TRICKERY. //////////////////////////////////////////// + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +template static T* GetPointer(void *message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} + +// This class contains handlers that can write into a proto2 class whose +// reflection class is GeneratedMessageReflection. (Despite the name, even +// DynamicMessage uses GeneratedMessageReflection, so this covers all proto2 +// messages generated by the compiler.) To do this it must break the +// encapsulation of GeneratedMessageReflection and therefore depends on +// internal interfaces that are not guaranteed to be stable. This class will +// need to be updated if any non-backward-compatible changes are made to +// GeneratedMessageReflection. +// +// TODO(haberman): change class name? In retrospect, "FieldAccessor" isn't the +// best (something more specific like GeneratedMessageReflectionHandlers or +// GMR_Handlers would be better) but we're depending on a "friend" declaration +// in proto2 that already specifies "FieldAccessor." No versions of proto2 have +// been released that include the "friend FieldAccessor" declaration, so there's +// still time to change this. On the other hand, perhaps it's simpler to just +// rely on "#define private public" since it may be a long time before new +// versions of proto2 open source are pervasive enough that we can remove this +// anyway. +class me::FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + const goog::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + switch (proto2_f->cpp_type()) { +#define PRIMITIVE_TYPE(cpptype, cident) \ + case goog::FieldDescriptor::cpptype: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); return true; + PRIMITIVE_TYPE(CPPTYPE_INT32, int32_t); + PRIMITIVE_TYPE(CPPTYPE_INT64, int64_t); + PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32_t); + PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64_t); + PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double); + PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); + PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); +#undef PRIMITIVE_TYPE + case goog::FieldDescriptor::CPPTYPE_ENUM: + SetEnumHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldDescriptor::CPPTYPE_STRING: { + // Old versions of the open-source protobuf release erroneously default + // to Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() : UPB_CTYPE_STRING; + switch (ctype) { +#ifdef UPB_GOOGLE3 + case goog::FieldOptions::STRING: + SetStringHandlers(proto2_f, m, r, upb_f, h); + return true; + case goog::FieldOptions::CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::STRING_PIECE: + SetStringPieceHandlers(proto2_f, r, upb_f, h); + return true; +#else + case UPB_CTYPE_STRING: + SetStringHandlers(proto2_f, m, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + case goog::FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (proto2_f->options().lazy()) { + return false; // Not yet implemented. + } else { + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + } +#else + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + + static const goog::Message* GetFieldPrototype( + const goog::Message& m, + const goog::FieldDescriptor* f) { + // We assume that all submessages (and extensions) will be constructed + // using the same MessageFactory as this message. This doesn't cover the + // case of CodedInputStream::SetExtensionRegistry(). + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast( + m.GetReflection()); + if (!r) return NULL; + return r->message_factory_->GetPrototype(f->message_type()); + } + + private: + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int64_t GetHasbit( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + // proto2 does not store hasbits for repeated fields. + assert(!f->is_repeated()); + return (r->has_bits_offset_ * 8) + f->index(); + } + + static uint16_t GetOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + class FieldOffset { + public: + FieldOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(GetOffset(f, r)), + is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template T* GetFieldPointer(void *message) const { + return GetPointer(message, offset_); + } + + void SetHasbit(void* m) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer(m, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + // StartSequence ///////////////////////////////////////////////////////////// + + static void SetStartSequenceHandler( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(f->IsSequence()); + h->SetStartSequenceHandler( + f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + + static void* PushOffset(void *m, void *fval) { + const FieldOffset* offset = static_cast(fval); + return offset->GetFieldPointer(m); + } + + // Primitive Value (numeric, bool) /////////////////////////////////////////// + + template static void SetPrimitiveHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler(f, &AppendPrimitive, NULL, NULL); + } else { + upb::SetStoreValueHandler( + f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + } + } + + template + static bool AppendPrimitive(void *_r, void *fval, T val) { + UPB_UNUSED(fval); + goog::RepeatedField* r = static_cast*>(_r); + r->Add(val); + return true; + } + + // Enum ////////////////////////////////////////////////////////////////////// + + class EnumHandlerData : public FieldOffset { + public: + EnumHandlerData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f) + : FieldOffset(proto2_f, r), + field_number_(f->number()), + unknown_fields_offset_(r->unknown_fields_offset_), + enum_(upb_downcast_enumdef(f->subdef())) { + } + + bool IsValidValue(int32_t val) const { + return enum_->FindValueByNumber(val) != NULL; + } + + int32_t field_number() const { return field_number_; } + + goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { + return GetPointer(m, unknown_fields_offset_); + } + + private: + int32_t field_number_; + size_t unknown_fields_offset_; + const upb::EnumDef* enum_; + }; + + static void SetEnumHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + EnumHandlerData* data = new EnumHandlerData(proto2_f, r, f); + if (f->IsSequence()) { + h->SetInt32Handler( + f, &AppendEnum, data, &upb::DeletePointer); + } else { + h->SetInt32Handler( + f, &SetEnum, data, &upb::DeletePointer); + } + } + + static bool SetEnum(void *_m, void *fval, int32_t val) { + goog::Message* m = static_cast(_m); + const EnumHandlerData* data = static_cast(fval); + if (data->IsValidValue(val)) { + int32_t* message_val = data->GetFieldPointer(m); + *message_val = val; + data->SetHasbit(m); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + return true; + } + + static bool AppendEnum(void *_m, void *fval, int32_t val) { + // Closure is the enclosing message. We can't use the RepeatedField<> as + // the closure because we need to go back to the message for unrecognized + // enum values, which go into the unknown field set. + goog::Message* m = static_cast(_m); + const EnumHandlerData* data = static_cast(fval); + if (data->IsValidValue(val)) { + goog::RepeatedField* r = + data->GetFieldPointer >(m); + r->Add(val); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + return true; + } + + // String //////////////////////////////////////////////////////////////////// + + // For scalar (non-repeated) string fields. + template + class StringHandlerData : public FieldOffset { + public: + StringHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message& prototype) + : FieldOffset(proto2_f, r) { + // "prototype" isn't guaranteed to be empty, so we create a copy to get + // the default string instance. + goog::Message* empty = prototype.New(); + prototype_ = &r->GetStringReference(*empty, proto2_f, NULL); + delete empty; + } + + const T* prototype() const { return prototype_; } + + T** GetStringPointer(void *message) const { + return GetFieldPointer(message); + } + + private: + const T* prototype_; + }; + + template static void SetStringHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); + } else { + StringHandlerData* data = new StringHandlerData(proto2_f, r, m); + h->SetStartStringHandler( + f, &StartString, data, &upb::DeletePointer >); + } + } + + // This needs to be templated because google3 string is not std::string. + template static void* StartString( + void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const StringHandlerData* data = + static_cast*>(fval); + T** str = data->GetStringPointer(m); + data->SetHasbit(m); + // If it points to the default instance, we must create a new instance. + if (*str == data->prototype()) *str = new T(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + template static size_t OnStringBuf( + void *_str, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + T* str = static_cast(_str); + str->append(buf, n); + return n; + } + + + template + static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + goog::RepeatedPtrField* r = static_cast*>(_r); + T* str = r->Add(); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : FieldOffset(f, r), + prototype_(prototype) { + } + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + SubMessageHandlerData* data = + new SubMessageHandlerData(proto2_f, r, GetFieldPrototype(m, proto2_f)); + upb::Handlers::Free* free = &upb::DeletePointer; + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartSubMessageHandler(f, &StartRepeatedSubMessage, data, free); + } else { + h->SetStartSubMessageHandler(f, &StartSubMessage, data, free); + } + } + + static void* StartSubMessage(void *m, void *fval) { + const SubMessageHandlerData* data = + static_cast(fval); + data->SetHasbit(m); + goog::Message **subm = data->GetFieldPointer(m); + if (*subm == NULL || *subm == data->prototype()) { + *subm = data->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static void* StartRepeatedSubMessage(void* _r, void *fval) { + const SubMessageHandlerData* data = + static_cast(fval); + goog::internal::RepeatedPtrFieldBase *r = + static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + submsg = data->prototype()->New(); + r->AddAllocated(submsg); + } + return submsg; + } + + // TODO(haberman): handle Extensions, Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnCordBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartCord(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + Cord* c = static_cast(_c); + c->Append(StringPiece(buf, n)); + return n; + } + + static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + proto2::RepeatedField* r = + static_cast*>(_r); + return r->Add(); + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static void SetStringPieceHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnStringPieceBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedStringPiece, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartStringPiece, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static size_t OnStringPieceBuf(void *_f, void *fval, + const char *buf, size_t len) { + UPB_UNUSED(fval); + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + proto2::internal::StringPieceField* field = + static_cast(_f); + size_t new_len = field->size() + len; + char *data = new char[new_len]; + memcpy(data, field->data(), field->size()); + memcpy(data + field->size(), buf, len); + field->CopyFrom(StringPiece(data, new_len)); + delete[] data; + return len; + } + + static void* StartStringPiece(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + proto2::internal::StringPieceField* field = + offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static void* StartRepeatedStringPiece(void* _r, void *fval, + size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + typedef proto2::RepeatedPtrField + RepeatedStringPiece; + RepeatedStringPiece* r = static_cast(_r); + proto2::internal::StringPieceField* field = r->Add(); + field->Clear(); + return field; + } + +#endif // UPB_GOOGLE3 +}; + +namespace upb { +namespace google { + +bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f, + const goog::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return me::FieldAccessor::TrySet(proto2_f, prototype, upb_f, h); +} + +const goog::Message* GetFieldPrototype( + const goog::Message& m, + const goog::FieldDescriptor* f) { + return me::FieldAccessor::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/google/proto2.h b/upb/google/proto2.h new file mode 100644 index 0000000..f2662ea --- /dev/null +++ b/upb/google/proto2.h @@ -0,0 +1,62 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Support for registering field handlers that can write into a proto2 +// message that uses GeneratedMessageReflection (which includes all messages +// generated by the proto2 compiler as well as DynamicMessage). +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO2_H_ +#define UPB_GOOGLE_PROTO2_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +} +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); +bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f, + const ::google::protobuf::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetFieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); +const ::google::protobuf::Message* GetFieldPrototype( + const ::google::protobuf::Message& m, + const ::google::protobuf::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO2_H_ -- cgit v1.2.3