From 86bad61b76a260ffc442acffbe58feee67df45e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Mar 2012 11:24:16 -0700 Subject: Sync from internal Google development. Many improvements, too many to mention. One significant perf regression warrants investigation: omitfp.parsetoproto2_googlemessage1.upb_jit: 343 -> 252 (-26.53) plain.parsetoproto2_googlemessage1.upb_jit: 334 -> 251 (-24.85) 25% regression for this benchmark is bad, but since I don't think there's any fundamental design issue that caused it I'm going to go ahead with the commit anyway. Can investigate and fix later. Other benchmarks were neutral or showed slight improvement. --- bindings/cpp/upb/bytestream.hpp | 33 ++ bindings/cpp/upb/def.hpp | 381 +++++++++++++--- bindings/cpp/upb/handlers.cc | 39 ++ bindings/cpp/upb/handlers.hpp | 47 +- bindings/cpp/upb/msg.hpp | 62 +++ bindings/cpp/upb/pb/glue.hpp | 12 + bindings/cpp/upb/proto2_bridge.cc | 892 +++++++++++++++++++++++++++++++++++++ bindings/cpp/upb/proto2_bridge.hpp | 170 +++++++ bindings/cpp/upb/upb.hpp | 44 +- 9 files changed, 1602 insertions(+), 78 deletions(-) create mode 100644 bindings/cpp/upb/handlers.cc create mode 100644 bindings/cpp/upb/msg.hpp create mode 100644 bindings/cpp/upb/proto2_bridge.cc create mode 100644 bindings/cpp/upb/proto2_bridge.hpp (limited to 'bindings/cpp') diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp index 968d542..81134b9 100644 --- a/bindings/cpp/upb/bytestream.hpp +++ b/bindings/cpp/upb/bytestream.hpp @@ -68,6 +68,7 @@ #include "upb/bytestream.h" #include "upb/upb.hpp" +#include namespace upb { @@ -204,6 +205,18 @@ class ByteRegion : public upb_byteregion { return upb_byteregion_strdup(this); } + template void AssignToString(T* str) { + uint64_t ofs = start_ofs(); + str->clear(); + str->reserve(Length()); + while (ofs < end_ofs()) { + size_t len; + const char *ptr = GetPtr(ofs, &len); + str->append(ptr, len); + ofs += len; + } + } + // TODO: add if/when there is a demonstrated need. // // // Pins this byteregion's bytes in memory, allowing it to outlive its @@ -220,12 +233,24 @@ class ByteRegion : public upb_byteregion { class StringSource : public upb_stringsrc { public: StringSource() : upb_stringsrc() { upb_stringsrc_init(this); } + template explicit StringSource(const T& str) { + upb_stringsrc_init(this); + Reset(str); + } + StringSource(const char *data, size_t len) { + upb_stringsrc_init(this); + Reset(data, len); + } ~StringSource() { upb_stringsrc_uninit(this); } void Reset(const char* data, size_t len) { upb_stringsrc_reset(this, data, len); } + template void Reset(const T& str) { + Reset(str.c_str(), str.size()); + } + ByteRegion* AllBytes() { return static_cast(upb_stringsrc_allbytes(this)); } @@ -233,6 +258,14 @@ class StringSource : public upb_stringsrc { upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); } }; +template <> inline ByteRegion* GetValue(Value v) { + return static_cast(upb_value_getbyteregion(v)); +} + +template <> inline Value MakeValue(ByteRegion* v) { + return upb_value_byteregion(v); +} + } // namespace upb #endif diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp index 030ba40..6998648 100644 --- a/bindings/cpp/upb/def.hpp +++ b/bindings/cpp/upb/def.hpp @@ -1,7 +1,7 @@ // // upb - a minimalist implementation of protocol buffers. // -// Copyright (c) 2011 Google Inc. See LICENSE for details. +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. // Author: Josh Haberman // // The set of upb::*Def classes and upb::SymbolTable allow for defining and @@ -15,21 +15,20 @@ // not be used for any purpose except to set its properties (it can't be // used to parse anything, create any messages in memory, etc). // -// 2. FINALIZED: after being added to a symtab (which links the defs together) -// the defs become finalized (thread-safe and immutable). Programs may only -// access defs through a CONST POINTER during this stage -- upb_symtab will -// help you out with this requirement by only vending const pointers, but -// you need to make sure not to use any non-const pointers you still have -// sitting around. In practice this means that you may not call any setters -// on the defs (or functions that themselves call the setters). If you want -// to modify an existing immutable def, copy it with upb_*_dup(), modify the -// copy, and add the modified def to the symtab (replacing the existing -// def). +// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs, +// which makes them thread-safe and immutable. Finalized defs may only be +// accessed through a CONST POINTER. If you want to modify an existing +// immutable def, copy it with Dup() and modify and finalize the copy. // -// You can test for which stage of life a def is in by calling -// upb::Def::IsMutable(). This is particularly useful for dynamic language -// bindings, which must properly guarantee that the dynamic language cannot -// break the rules laid out above. +// The refcounting of defs works properly no matter what state the def is in. +// Once the def is finalized it is guaranteed that any def reachable from a +// live def is also live (so a ref on the base of a message tree keeps the +// whole tree alive). +// +// You can test for which stage of life a def is in by calling IsMutable(). +// This is particularly useful for dynamic language bindings, which must +// properly guarantee that the dynamic language cannot break the rules laid out +// above. // // It would be possible to make the defs thread-safe during stage 1 by using // mutexes internally and changing any methods returning pointers to return @@ -48,63 +47,213 @@ namespace upb { +class Def; class MessageDef; +typedef upb_fieldtype_t FieldType; +typedef upb_label_t Label; + class FieldDef : public upb_fielddef { public: - static FieldDef* Cast(upb_fielddef *f) { return (FieldDef*)f; } - static const FieldDef* Cast(const upb_fielddef *f) { return (FieldDef*)f; } + static FieldDef* Cast(upb_fielddef *f) { return static_cast(f); } + static const FieldDef* Cast(const upb_fielddef *f) { + return static_cast(f); + } + + static FieldDef* New(void *owner) { return Cast(upb_fielddef_new(owner)); } + FieldDef* Dup(void *owner) const { + return Cast(upb_fielddef_dup(this, owner)); + } + void Ref(void *owner) { upb_fielddef_ref(this, owner); } + void Unref(void *owner) { upb_fielddef_unref(this, owner); } - static FieldDef* New() { return Cast(upb_fielddef_new()); } - FieldDef* Dup() { return Cast(upb_fielddef_dup(this)); } + bool IsMutable() const { return upb_fielddef_ismutable(this); } + bool IsFinalized() const { return upb_fielddef_isfinalized(this); } + bool IsString() const { return upb_isstring(this); } + bool IsSequence() const { return upb_isseq(this); } + bool IsSubmessage() const { return upb_issubmsg(this); } - // Read accessors -- may be called at any time. - uint8_t type() const { return upb_fielddef_type(this); } - uint8_t label() const { return upb_fielddef_label(this); } + // Simple accessors. ///////////////////////////////////////////////////////// + + FieldType type() const { return upb_fielddef_type(this); } + Label label() const { return upb_fielddef_label(this); } int32_t number() const { return upb_fielddef_number(this); } std::string name() const { return std::string(upb_fielddef_name(this)); } Value default_() const { return upb_fielddef_default(this); } Value bound_value() const { return upb_fielddef_fval(this); } + uint16_t offset() const { return upb_fielddef_offset(this); } + int16_t hasbit() const { return upb_fielddef_hasbit(this); } + + bool set_type(FieldType type) { return upb_fielddef_settype(this, type); } + bool set_label(Label label) { return upb_fielddef_setlabel(this, label); } + void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); } + void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); } + void set_fval(Value fval) { upb_fielddef_setfval(this, fval); } + void set_accessor(struct _upb_accessor_vtbl* vtbl) { + upb_fielddef_setaccessor(this, vtbl); + } + MessageDef* message(); + const MessageDef* message() const; - MessageDef* message() { return (MessageDef*)upb_fielddef_msgdef(this); } - const MessageDef* message() const { return (MessageDef*)upb_fielddef_msgdef(this); } - - // Will be added once upb::Def is defined: - // Def* subdef() { return upb_fielddef_subdef(this); } - // const Def* subdef() { return upb_fielddef_subdef(this); } - - // Returns true if this FieldDef is finalized - bool IsFinalized() const { return upb_fielddef_finalized(this); } struct _upb_accessor_vtbl *accessor() const { return upb_fielddef_accessor(this); } - std::string type_name() const { - return std::string(upb_fielddef_typename(this)); + + // "Number" and "name" must be set before the fielddef is added to a msgdef. + // For the moment we do not allow these to be set once the fielddef is added + // to a msgdef -- this could be relaxed in the future. + bool set_number(int32_t number) { + return upb_fielddef_setnumber(this, number); + } + bool set_name(const char *name) { return upb_fielddef_setname(this, name); } + bool set_name(const std::string& name) { return set_name(name.c_str()); } + + // Default value. //////////////////////////////////////////////////////////// + + // Returns the default value for this fielddef, which may either be something + // the client set explicitly or the "default default" (0 for numbers, empty + // for strings). The field's type indicates the type of the returned value, + // except for enum fields that are still mutable. + // + // For enums the default can be set either numerically or symbolically -- the + // upb_fielddef_default_is_symbolic() function below will indicate which it + // is. For string defaults, the value will be a upb_byteregion which is + // invalidated by any other non-const call on this object. Once the fielddef + // is finalized, symbolic enum defaults are resolved, so finalized enum + // fielddefs always have a default of type int32. + Value defaultval() { return upb_fielddef_default(this); } + + // Sets default value for the field. For numeric types, use + // upb_fielddef_setdefault(), and "value" must match the type of the field. + // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may + // use either, since the default may be set either numerically or + // symbolically. + // + // NOTE: May only be called for fields whose type has already been set. + // Also, will be reset to default if the field's type is set again. + void set_default(Value value) { upb_fielddef_setdefault(this, value); } + void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); } + void set_default(const char *str, size_t len) { + upb_fielddef_setdefaultstr(this, str, len); + } + void set_default(const std::string& str) { + upb_fielddef_setdefaultstr(this, str.c_str(), str.size()); + } + + // The results of this function are only meaningful for mutable enum fields, + // which can have a default specified either as an integer or as a string. + // If this returns true, the default returned from upb_fielddef_default() is + // a string, otherwise it is an integer. + bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); } + + // Subdef. /////////////////////////////////////////////////////////////////// + + // Submessage and enum fields must reference a "subdef", which is the + // MessageDef or EnumDef that defines their type. Note that when the + // FieldDef is mutable it may not have a subdef *yet*, but this still returns + // true to indicate that the field's type requires a subdef. + bool HasSubDef() { return upb_hassubdef(this); } + + // Before a FieldDef is finalized, its subdef may be set either directly + // (with a Def*) or symbolically. Symbolic refs must be resolved by the + // client before the containing msgdef can be finalized. + // + // Both methods require that HasSubDef() (so the type must be set prior to + // calling these methods). Returns false if this is not the case, or if the + // given subdef is not of the correct type. The subtype is reset if the + // field's type is changed. + bool set_subdef(Def* def); + bool set_subtype_name(const char *name) { + return upb_fielddef_setsubtypename(this, name); + } + bool set_subtype_name(const std::string& str) { + return set_subtype_name(str.c_str()); } - // Write accessors -- may not be called once the FieldDef is finalized. + // Returns the enum or submessage def or symbolic name for this field, if + // any. May only be called for fields where HasSubDef() is true. Returns + // NULL if the subdef has not been set or if you ask for a subtype name when + // the subtype is currently set symbolically (or vice-versa). + // + // Caller does *not* own a ref on the returned def or string. + // subtypename_name() is non-const because only mutable defs can have the + // subtype name set symbolically (symbolic references must be resolved before + // the MessageDef can be finalized). + const Def* subdef() const; + const char *subtype_name() { return upb_fielddef_subtypename(this); } private: - FieldDef(); - ~FieldDef(); + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef); +}; + +class Def : public upb_def { + public: + // Converting from C types to C++ wrapper types. + static Def* Cast(upb_def *def) { return static_cast(def); } + static const Def* Cast(const upb_def *def) { + return static_cast(def); + } + + void Ref(void *owner) const { upb_def_ref(this, owner); } + void Unref(void *owner) const { upb_def_unref(this, owner); } + + void set_full_name(const char *name) { upb_def_setfullname(this, name); } + void set_full_name(const std::string& name) { + upb_def_setfullname(this, name.c_str()); + } + + const char *full_name() const { return upb_def_fullname(this); } + + // Finalizes the given list of defs (as well as the fielddefs for the given + // msgdefs). All defs reachable from any def in this list must either be + // already finalized or elsewhere in the list. Any symbolic references to + // enums or submessages must already have been resolved. Returns true on + // success, otherwise false is returned and status contains details. In the + // error case the input defs are unmodified. See the comment at the top of + // this file for the semantics of finalized defs. + // + // n is currently limited to 64k defs, if more are required break them into + // batches of 64k (or we could raise this limit, at the cost of a bigger + // upb_def structure or complexity in upb_def_finalize()). + static bool Finalize(Def*const* defs, int n, Status* status) { + return upb_finalize(reinterpret_cast(defs), n, status); + } + static bool Finalize(const std::vector& defs, Status* status) { + return Finalize(&defs[0], defs.size(), status); + } }; class MessageDef : public upb_msgdef { public: // Converting from C types to C++ wrapper types. - static MessageDef* Cast(upb_msgdef *md) { return (MessageDef*)md; } + static MessageDef* Cast(upb_msgdef *md) { + return static_cast(md); + } static const MessageDef* Cast(const upb_msgdef *md) { - return (MessageDef*)md; + return static_cast(md); + } + static MessageDef* DynamicCast(Def* def) { + return Cast(upb_dyncast_msgdef(def)); + } + static const MessageDef* DynamicCast(const Def* def) { + return Cast(upb_dyncast_msgdef_const(def)); } - static MessageDef* New() { return Cast(upb_msgdef_new()); } - MessageDef* Dup() { return Cast(upb_msgdef_dup(this)); } + Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } + const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } + + static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); } + MessageDef* Dup(void *owner) const { + return Cast(upb_msgdef_dup(this, owner)); + } - void Ref() const { upb_msgdef_ref(this); } - void Unref() const { upb_msgdef_unref(this); } + void Ref(void *owner) const { upb_msgdef_ref(this, owner); } + void Unref(void *owner) const { upb_msgdef_unref(this, owner); } // Read accessors -- may be called at any time. + const char *full_name() const { return AsDef()->full_name(); } + // The total size of in-memory messages created with this MessageDef. uint16_t instance_size() const { return upb_msgdef_size(this); } @@ -116,25 +265,32 @@ class MessageDef : public upb_msgdef { // Write accessors. May only be called before the msgdef is in a symtab. + void set_full_name(const char *name) { AsDef()->set_full_name(name); } + void set_full_name(const std::string& name) { AsDef()->set_full_name(name); } + void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); } void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); } bool SetExtensionRange(uint32_t start, uint32_t end) { return upb_msgdef_setextrange(this, start, end); } - // Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains - // its ref on the fielddef. May only be done before the msgdef is in a - // symtab (requires upb_def_ismutable(m) for the msgdef). The fielddef's - // name and number must be set, and the message may not already contain any - // field with this name or number, and this fielddef may not be part of - // another message, otherwise false is returned and no action is performed. - bool AddFields(FieldDef*const * f, int n) { - return upb_msgdef_addfields(this, (upb_fielddef**)f, n); + // Adds a set of fields (FieldDef objects) to a MessageDef. Caller passes a + // ref on the FieldDef to the MessageDef in both success and failure cases. + // May only be done before the MessageDef is in a SymbolTable (requires + // m->IsMutable() for the MessageDef). The FieldDef's name and number must + // be set, and the message may not already contain any field with this name + // or number, and this FieldDef may not be part of another message, otherwise + // false is returned and the MessageDef is unchanged. + bool AddField(FieldDef* f, void *owner) { return AddFields(&f, 1, owner); } + bool AddFields(FieldDef*const * f, int n, void *owner) { + return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner); } - bool AddFields(const std::vector& fields) { - return AddFields(&fields[0], fields.size()); + bool AddFields(const std::vector& fields, void *owner) { + return AddFields(&fields[0], fields.size(), owner); } + int field_count() const { return upb_msgdef_numfields(this); } + // Lookup fields by name or number, returning NULL if no such field exists. FieldDef* FindFieldByName(const char *name) { return FieldDef::Cast(upb_msgdef_ntof(this, name)); @@ -156,19 +312,89 @@ class MessageDef : public upb_msgdef { return FindFieldByNumber(num); } - // TODO: iteration over fields. + class Iterator : public upb_msg_iter { + public: + explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); } + Iterator() {} + + FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); } + bool Done() { return upb_msg_done(this); } + void Next() { return upb_msg_next(this); } + }; + + class ConstIterator : public upb_msg_iter { + public: + explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); } + ConstIterator() {} + + const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); } + bool Done() { return upb_msg_done(this); } + void Next() { return upb_msg_next(this); } + }; private: - MessageDef(); - ~MessageDef(); + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef); +}; + +class EnumDef : public upb_enumdef { + public: + // Converting from C types to C++ wrapper types. + static EnumDef* Cast(upb_enumdef *e) { return static_cast(e); } + static const EnumDef* Cast(const upb_enumdef *e) { + return static_cast(e); + } + + static EnumDef* New(void *owner) { return Cast(upb_enumdef_new(owner)); } + + void Ref(void *owner) { upb_enumdef_ref(this, owner); } + void Unref(void *owner) { upb_enumdef_unref(this, owner); } + EnumDef* Dup(void *owner) const { return Cast(upb_enumdef_dup(this, owner)); } + + Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } + const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } + + int32_t default_value() const { return upb_enumdef_default(this); } + + // May only be set if IsMutable(). + void set_full_name(const char *name) { AsDef()->set_full_name(name); } + void set_full_name(const std::string& name) { AsDef()->set_full_name(name); } + void set_default_value(int32_t val) { + return upb_enumdef_setdefault(this, val); + } + + // Adds a value to the enumdef. Requires that no existing val has this + // name or number (returns false and does not add if there is). May only + // be called if IsMutable(). + bool AddValue(char *name, int32_t num) { + return upb_enumdef_addval(this, name, num); + } + bool AddValue(const std::string& name, int32_t num) { + return upb_enumdef_addval(this, name.c_str(), num); + } + + // Lookups from name to integer and vice-versa. + bool LookupName(const char *name, int32_t* num) const { + return upb_enumdef_ntoi(this, name, num); + } + + // Lookup from integer to name, returns a NULL-terminated string which + // the caller does not own, or NULL if not found. + const char *LookupNumber(int32_t num) const { + return upb_enumdef_iton(this, num); + } + + private: + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef); }; class SymbolTable : public upb_symtab { public: // Converting from C types to C++ wrapper types. - static SymbolTable* Cast(upb_symtab *s) { return (SymbolTable*)s; } + static SymbolTable* Cast(upb_symtab *s) { + return static_cast(s); + } static const SymbolTable* Cast(const upb_symtab *s) { - return (SymbolTable*)s; + return static_cast(s); } static SymbolTable* New() { return Cast(upb_symtab_new()); } @@ -176,17 +402,50 @@ class SymbolTable : public upb_symtab { void Ref() const { upb_symtab_unref(this); } void Unref() const { upb_symtab_unref(this); } + // Adds the given defs to the symtab, resolving all symbols. Only one def + // per name may be in the list, but defs can replace existing defs in the + // symtab. The entire operation either succeeds or fails. If the operation + // fails, the symtab is unchanged, false is returned, and status indicates + // the error. The caller passes a ref on the defs in all cases. + bool Add(Def *const *defs, int n, void *owner, Status* status) { + return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status); + } + bool Add(const std::vector& defs, void *owner, Status* status) { + return Add(&defs[0], defs.size(), owner, status); + } + // If the given name refers to a message in this symbol table, returns a new // ref to that MessageDef object, otherwise returns NULL. - const MessageDef* LookupMessage(const char *name) const { - return MessageDef::Cast(upb_symtab_lookupmsg(this, name)); + const MessageDef* LookupMessage(const char *name, void *owner) const { + return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner)); } private: - SymbolTable(); - ~SymbolTable(); + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable); }; +template <> inline const FieldDef* GetValue(Value v) { + return static_cast(upb_value_getfielddef(v)); +} + +template <> inline Value MakeValue(FieldDef* v) { + return upb_value_fielddef(v); +} + +inline MessageDef* FieldDef::message() { + return MessageDef::Cast(upb_fielddef_msgdef(this)); +} +inline const MessageDef* FieldDef::message() const { + return MessageDef::Cast(upb_fielddef_msgdef(this)); +} + +inline const Def* FieldDef::subdef() const { + return Def::Cast(upb_fielddef_subdef(this)); +} +inline bool FieldDef::set_subdef(Def* def) { + return upb_fielddef_setsubdef(this, def); +} + } // namespace upb #endif diff --git a/bindings/cpp/upb/handlers.cc b/bindings/cpp/upb/handlers.cc new file mode 100644 index 0000000..c96a74e --- /dev/null +++ b/bindings/cpp/upb/handlers.cc @@ -0,0 +1,39 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011 Google Inc. See LICENSE for details. +// Author: Josh Haberman + +#include "handlers.hpp" + +#include "def.hpp" + +namespace upb { + +namespace { + +void MessageCallbackWrapper( + void* closure, upb_mhandlers* mh, const upb_msgdef* m) { + Handlers::MessageRegistrationVisitor* visitor = + static_cast(closure); + visitor->OnMessage(static_cast(mh), + static_cast(m)); +} + +void FieldCallbackWrapper( + void* closure, upb_fhandlers* fh, const upb_fielddef* f) { + Handlers::MessageRegistrationVisitor* visitor = + static_cast(closure); + visitor->OnField(static_cast(fh), + static_cast(f)); +} +} // namepace + +MessageHandlers* Handlers::RegisterMessageDef( + const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) { + upb_mhandlers* mh = upb_handlers_regmsgdef( + this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor); + return static_cast(mh); +} + +} // namespace upb diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp index d356a33..a366c3d 100644 --- a/bindings/cpp/upb/handlers.hpp +++ b/bindings/cpp/upb/handlers.hpp @@ -15,11 +15,16 @@ #include "upb/handlers.h" +#include "upb/upb.hpp" + namespace upb { typedef upb_fieldtype_t FieldType; typedef upb_flow_t Flow; +typedef upb_sflow_t SubFlow; class MessageHandlers; +class MessageDef; +class FieldDef; class FieldHandlers : public upb_fhandlers { public: @@ -68,12 +73,11 @@ class FieldHandlers : public upb_fhandlers { MessageHandlers* GetSubMessageHandlers() const; // If set to >=0, the given hasbit will be set after the value callback is // called (offset relative to the current closure). - int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); } - void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); } + int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); } + void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); } private: - FieldHandlers(); // Only created by upb::Handlers. - ~FieldHandlers(); // Only destroyed by refcounting. + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers); }; class MessageHandlers : public upb_mhandlers { @@ -81,6 +85,13 @@ class MessageHandlers : public upb_mhandlers { typedef upb_startmsg_handler StartMessageHandler; typedef upb_endmsg_handler EndMessageHandler; + static MessageHandlers* Cast(upb_mhandlers* mh) { + return static_cast(mh); + } + static const MessageHandlers* Cast(const upb_mhandlers* mh) { + return static_cast(mh); + } + // The MessageHandlers will live at least as long as the upb::Handlers to // which it belongs, but can be Ref'd/Unref'd to make it live longer (which // will prolong the life of the underlying upb::Handlers also). @@ -89,7 +100,7 @@ class MessageHandlers : public upb_mhandlers { // Functions to set this message's handlers. // These return "this" so they can be conveniently chained, eg. - // handlers->NewMessage() + // handlers->NewMessageHandlers() // ->SetStartMessageHandler(&StartMessage) // ->SetEndMessageHandler(&EndMessage); MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) { @@ -111,13 +122,13 @@ class MessageHandlers : public upb_mhandlers { FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name, FieldType type, bool repeated, MessageHandlers* subm) { + (void)name; return static_cast( upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm)); } private: - MessageHandlers(); // Only created by upb::Handlers. - ~MessageHandlers(); // Only destroyed by refcounting. + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers); }; class Handlers : public upb_handlers { @@ -134,17 +145,29 @@ class Handlers : public upb_handlers { return static_cast(upb_handlers_newmhandlers(this)); } + // Convenience function for registering handlers for all messages and fields + // in a MessageDef and all its children. For every registered message, + // OnMessage will be called on the visitor with newly-created MessageHandlers + // and MessageDef. Likewise with OnField will be called with newly-created + // FieldHandlers and FieldDef for each field. + class MessageRegistrationVisitor { + public: + virtual ~MessageRegistrationVisitor() {} + virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0; + virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0; + }; + MessageHandlers* RegisterMessageDef(const MessageDef& m, + MessageRegistrationVisitor* visitor); + private: - Handlers(); // Only created by Handlers::New(). - ~Handlers(); // Only destroyed by refcounting. + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers); }; - -MessageHandlers* FieldHandlers::GetMessageHandlers() const { +inline MessageHandlers* FieldHandlers::GetMessageHandlers() const { return static_cast(upb_fhandlers_getmsg(this)); } -MessageHandlers* FieldHandlers::GetSubMessageHandlers() const { +inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const { return static_cast(upb_fhandlers_getsubmsg(this)); } diff --git a/bindings/cpp/upb/msg.hpp b/bindings/cpp/upb/msg.hpp new file mode 100644 index 0000000..c7cf1f2 --- /dev/null +++ b/bindings/cpp/upb/msg.hpp @@ -0,0 +1,62 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// Routines for reading and writing message data to an in-memory structure, +// similar to a C struct. +// +// upb does not define one single message object that everyone must use. +// Rather it defines an abstract interface for reading and writing members +// of a message object, and all of the parsers and serializers use this +// abstract interface. This allows upb's parsers and serializers to be used +// regardless of what memory management scheme or synchronization model the +// application is using. +// +// A standard set of accessors is provided for doing simple reads and writes at +// a known offset into the message. These accessors should be used when +// possible, because they are specially optimized -- for example, the JIT can +// recognize them and emit specialized code instead of having to call the +// function at all. The application can substitute its own accessors when the +// standard accessors are not suitable. + +#ifndef UPB_MSG_HPP +#define UPB_MSG_HPP + +#include "upb/msg.h" +#include "upb/handlers.hpp" + +namespace upb { + +typedef upb_accessor_vtbl AccessorVTable; + +// Registers handlers for writing into a message of the given type using +// whatever accessors it has defined. +inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers, + const upb::MessageDef* md) { + return MessageHandlers::Cast( + upb_accessors_reghandlers(handlers, md)); +} + +template static FieldHandlers::ValueHandler* GetValueHandler(); + +// A handy templated function that will retrieve a value handler for a given +// C++ type. +#define GET_VALUE_HANDLER(type, ctype) \ + template <> \ + FieldHandlers::ValueHandler* GetValueHandler() { \ + return &upb_stdmsg_set ## type; \ + } + +GET_VALUE_HANDLER(double, double); +GET_VALUE_HANDLER(float, float); +GET_VALUE_HANDLER(uint64, uint64_t); +GET_VALUE_HANDLER(uint32, uint32_t); +GET_VALUE_HANDLER(int64, int64_t); +GET_VALUE_HANDLER(int32, int32_t); +GET_VALUE_HANDLER(bool, bool); +#undef GET_VALUE_HANDLER + +} // namespace + +#endif diff --git a/bindings/cpp/upb/pb/glue.hpp b/bindings/cpp/upb/pb/glue.hpp index be072a7..d43baeb 100644 --- a/bindings/cpp/upb/pb/glue.hpp +++ b/bindings/cpp/upb/pb/glue.hpp @@ -13,11 +13,23 @@ namespace upb { +// All routines that load descriptors expect the descriptor to be a +// FileDescriptorSet. bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname, Status* status) { return upb_load_descriptor_file_into_symtab(s, fname, status); } +bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str, + size_t len, Status* status) { + return upb_load_descriptor_into_symtab(s, str, len, status); +} + +template +bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) { + return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status); +} + } // namespace upb #endif diff --git a/bindings/cpp/upb/proto2_bridge.cc b/bindings/cpp/upb/proto2_bridge.cc new file mode 100644 index 0000000..6119295 --- /dev/null +++ b/bindings/cpp/upb/proto2_bridge.cc @@ -0,0 +1,892 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman + +#include +#include +#include "upb/bytestream.hpp" +#include "upb/def.hpp" +#include "upb/handlers.hpp" +#include "upb/msg.hpp" +#include "upb/proto2_bridge.hpp" + +namespace { + +static void* GetFieldPointer(void *message, const upb::FieldDef* f) { + return static_cast(message) + f->offset(); +} + +} // namespace + +#ifdef UPB_GOOGLE3 + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef private + +#define private public +#include "net/proto/proto2_reflection.h" +#undef private + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/generated_message_reflection.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/string_piece_field_support.h" +#include "net/proto/internal_layout.h" +#include "strings/cord.h" +using ::proto2::Descriptor; +using ::proto2::EnumDescriptor; +using ::proto2::EnumValueDescriptor; +using ::proto2::FieldDescriptor; +using ::proto2::FieldOptions; +using ::proto2::FileDescriptor; +using ::proto2::internal::GeneratedMessageReflection; +using ::proto2::internal::RepeatedPtrFieldBase; +using ::proto2::internal::StringPieceField; +using ::proto2::Message; +using ::proto2::MessageFactory; +using ::proto2::Reflection; +using ::proto2::RepeatedField; +using ::proto2::RepeatedPtrField; + +namespace upb { + +static const Message* GetPrototypeForField(const Message& m, + const FieldDescriptor* f); + +namespace proto2_bridge_google3 { class FieldAccessor; } + +using ::upb::proto2_bridge_google3::FieldAccessor; + +namespace proto2_bridge_google3 { + +static void AssignToCord(const ByteRegion* r, Cord* cord) { + // TODO(haberman): ref source data if source is a cord. + cord->Clear(); + uint64_t ofs = r->start_ofs(); + while (ofs < r->end_ofs()) { + size_t len; + const char *buf = r->GetPtr(ofs, &len); + cord->Append(StringPiece(buf, len)); + ofs += len; + } +} + +#else + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/message.h" +using ::google::protobuf::Descriptor; +using ::google::protobuf::EnumDescriptor; +using ::google::protobuf::EnumValueDescriptor; +using ::google::protobuf::FieldDescriptor; +using ::google::protobuf::FieldOptions; +using ::google::protobuf::FileDescriptor; +using ::google::protobuf::internal::GeneratedMessageReflection; +using ::google::protobuf::internal::RepeatedPtrFieldBase; +using ::google::protobuf::Message; +using ::google::protobuf::MessageFactory; +using ::google::protobuf::Reflection; +using ::google::protobuf::RepeatedField; +using ::google::protobuf::RepeatedPtrField; + +namespace upb { +static const Message* GetPrototypeForField(const Message& m, + const FieldDescriptor* f); + +namespace proto2_bridge_opensource { class FieldAccessor; } + +using ::upb::proto2_bridge_opensource::FieldAccessor; + +namespace proto2_bridge_opensource { + +#endif // ifdef UPB_GOOGLE3 + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +// The code in this class depends on the internal representation of the proto2 +// generated classes, which is an internal implementation detail of proto2 and +// is not a public interface. As a result, this class's implementation may +// need to be changed if/when proto2 changes its internal representation. It +// is intended that this class is the only code that depends on these internal, +// non-public interfaces. +// +// This class only works with messages that use GeneratedMessageReflection. +// Other reflection classes will need other accessor implementations. +class FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const FieldDescriptor* proto2_f, + const upb::MessageDef* md, + upb::FieldDef* upb_f) { + const Message* prototype = static_cast(md->prototype); + const Reflection* base_r = prototype->GetReflection(); + const GeneratedMessageReflection* r = + dynamic_cast(base_r); + // Old versions of the open-source protobuf release erroneously default to + // Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() : UPB_CTYPE_STRING; + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype)); + upb_f->set_hasbit(GetHasbit(proto2_f, r)); + upb_f->set_offset(GetOffset(proto2_f, r)); + if (upb_f->IsSubmessage()) { + upb_f->set_subtype_name(proto2_f->message_type()->full_name()); + upb_f->prototype = GetPrototypeForField(*prototype, proto2_f); + } + + if (upb_f->IsString() && !upb_f->IsSequence() && + ctype == UPB_CTYPE_STRING) { + upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL); + } + return true; + } + + static MessageFactory* GetMessageFactory(const Message& m) { + const GeneratedMessageReflection* r = + dynamic_cast(m.GetReflection()); + return r ? r->message_factory_ : NULL; + } + + private: + static int64_t GetHasbit(const FieldDescriptor* f, + const GeneratedMessageReflection* r) { + if (f->is_repeated()) { + // proto2 does not store hasbits for repeated fields. + return -1; + } else { + return (r->has_bits_offset_ * 8) + f->index(); + } + } + + static uint16_t GetOffset(const FieldDescriptor* f, + const GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f, + int32_t ctype) { + switch (f->cpp_type()) { + case FieldDescriptor::CPPTYPE_ENUM: + // Should handlers validate enum membership to match proto2? + case FieldDescriptor::CPPTYPE_INT32: return Get(); + case FieldDescriptor::CPPTYPE_INT64: return Get(); + case FieldDescriptor::CPPTYPE_UINT32: return Get(); + case FieldDescriptor::CPPTYPE_UINT64: return Get(); + case FieldDescriptor::CPPTYPE_DOUBLE: return Get(); + case FieldDescriptor::CPPTYPE_FLOAT: return Get(); + case FieldDescriptor::CPPTYPE_BOOL: return Get(); + case FieldDescriptor::CPPTYPE_STRING: + switch (ctype) { +#ifdef UPB_GOOGLE3 + case FieldOptions::STRING: + return GetForString(); + case FieldOptions::CORD: + return GetForCord(); + case FieldOptions::STRING_PIECE: + return GetForStringPiece(); +#else + case UPB_CTYPE_STRING: + return GetForString(); +#endif + default: return NULL; + } + case FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (f->options().lazy()) { + return NULL; // Not yet implemented. + } else { + return GetForMessage(); + } +#else + return GetForMessage(); +#endif + default: return NULL; + } + } + + // PushOffset handler (used for StartSequence and others) /////////////////// + + static SubFlow PushOffset(void *m, Value fval) { + const FieldDef *f = GetValue(fval); + return UPB_CONTINUE_WITH(GetFieldPointer(m, f)); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template static AccessorVTable *Get() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + GetValueHandler(), + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &Append, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + template + static Flow Append(void *_r, Value fval, Value val) { + (void)fval; + RepeatedField* r = static_cast*>(_r); + r->Add(GetValue(val)); + return UPB_CONTINUE; + } + + // String //////////////////////////////////////////////////////////////////// + + template static AccessorVTable *GetForString() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetString, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendString, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + // This needs to be templated because google3 string is not std::string. + template static Flow SetString(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + T **str = static_cast(GetFieldPointer(m, f)); + // If it points to the default instance, we must create a new instance. + if (*str == f->prototype) *str = new T(); + GetValue(val)->AssignToString(*str); + return UPB_CONTINUE; + } + + template + static Flow AppendString(void *_r, Value fval, Value val) { + (void)fval; + RepeatedPtrField* r = static_cast*>(_r); + GetValue(val)->AssignToString(r->Add()); + return UPB_CONTINUE; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForMessage() { + static upb_accessor_vtbl vtbl = { + &StartSubMessage, + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static SubFlow StartSubMessage(void *m, Value fval) { + const FieldDef* f = GetValue(fval); + void **subm = static_cast(GetFieldPointer(m, f)); + if (*subm == NULL || *subm == f->prototype) { + const Message* prototype = static_cast(f->prototype); + *subm = prototype->New(); + } + return UPB_CONTINUE_WITH(*subm); + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static SubFlow StartRepeatedSubMessage(void* _r, Value fval) { + const FieldDef* f = GetValue(fval); + RepeatedPtrFieldBase *r = static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + const Message* prototype = static_cast(f->prototype); + submsg = prototype->New(); + r->AddAllocated(submsg); + } + return UPB_CONTINUE_WITH(submsg); + } + + // TODO(haberman): handle Extensions, Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForCord() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetCord, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendCord, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetCord(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + Cord* field = static_cast(GetFieldPointer(m, f)); + AssignToCord(GetValue(val), field); + return UPB_CONTINUE; + } + + static Flow AppendCord(void *_r, Value fval, Value val) { + RepeatedField* r = static_cast*>(_r); + AssignToCord(GetValue(val), r->Add()); + return UPB_CONTINUE; + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForStringPiece() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetStringPiece, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendStringPiece, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static void AssignToStringPieceField(const ByteRegion* r, + proto2::internal::StringPieceField* f) { + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + char *data = new char[r->Length()]; + r->Copy(r->start_ofs(), r->Length(), data); + f->CopyFrom(StringPiece(data, r->Length())); + delete[] data; + } + + static Flow SetStringPiece(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + StringPieceField* field = + static_cast(GetFieldPointer(m, f)); + AssignToStringPieceField(GetValue(val), field); + return UPB_CONTINUE; + } + + static Flow AppendStringPiece(void* _r, Value fval, Value val) { + RepeatedPtrField* r = + static_cast*>(_r); + AssignToStringPieceField(GetValue(val), r->Add()); + return UPB_CONTINUE; + } + +#endif // UPB_GOOGLE3 +}; + +#ifdef UPB_GOOGLE3 + +// Proto1 accessor -- only needed inside Google. +class Proto1FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const FieldDescriptor* proto2_f, + const upb::MessageDef* md, + upb::FieldDef* upb_f) { + const Message* m = static_cast(md->prototype); + const proto2::Reflection* base_r = m->GetReflection(); + const _pi::Proto2Reflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + const _pi::Field* f = r->GetFieldLayout(proto2_f); + + if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + // Override the BYTES type that proto2 descriptors have for weak fields. + upb_f->set_type(UPB_TYPE(MESSAGE)); + } + + if (upb_f->IsSubmessage()) { + const Message* prototype = upb::GetPrototypeForField(*m, proto2_f); + upb_f->set_subtype_name(prototype->GetDescriptor()->full_name()); + upb_f->prototype = prototype; + } + + upb_f->set_accessor(GetForCrep(f->crep)); + upb_f->set_hasbit(GetHasbit(proto2_f, r)); + upb_f->set_offset(GetOffset(proto2_f, r)); + return true; + } + + private: + static int16_t GetHasbit(const FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + if (f->is_repeated()) { + // proto1 does not store hasbits for repeated fields. + return -1; + } else { + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + } + + static uint16_t GetOffset(const FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + static AccessorVTable *GetForCrep(int crep) { +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_ ## name: \ + case _pi::CREP_OPTIONAL_ ## name: \ + case _pi::CREP_REPEATED_ ## name: return Get(); + + switch (crep) { + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: return GetForString(); + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString(); + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: return GetForCord(); + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage(); + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage(); + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage(); + default: assert(false); return NULL; + } +#undef PRIMITIVE + } + + // PushOffset handler (used for StartSequence and others) /////////////////// + + // We can find a RepeatedField* or a RepeatedPtrField* at f->offset(). + static SubFlow PushOffset(void *m, Value fval) { + const FieldDef *f = GetValue(fval); + return UPB_CONTINUE_WITH(GetFieldPointer(m, f)); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template static AccessorVTable *Get() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + GetValueHandler(), + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &Append, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + template + static Flow Append(void *_r, Value fval, Value val) { + (void)fval; + // Proto1's ProtoArray class derives from RepeatedField. + RepeatedField* r = static_cast*>(_r); + r->Add(GetValue(val)); + return UPB_CONTINUE; + } + + // String //////////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForString() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetString, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendString, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetString(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + string *str = static_cast(GetFieldPointer(m, f)); + GetValue(val)->AssignToString(str); + return UPB_CONTINUE; + } + + static Flow AppendString(void *_r, Value fval, Value val) { + (void)fval; + RepeatedPtrField* r = static_cast*>(_r); + GetValue(val)->AssignToString(r->Add()); + return UPB_CONTINUE; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static AccessorVTable *GetForOutOfLineString() { + static upb_accessor_vtbl vtbl = { + NULL, &SetOutOfLineString, + // This type is only used for non-repeated string fields. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetOutOfLineString(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + string **str = static_cast(GetFieldPointer(m, f)); + if (*str == &::ProtocolMessage::___empty_internal_proto_string_) + *str = new string(); + GetValue(val)->AssignToString(*str); + return UPB_CONTINUE; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForCord() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetCord, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendCord, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetCord(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + Cord* field = static_cast(GetFieldPointer(m, f)); + AssignToCord(GetValue(val), field); + return UPB_CONTINUE; + } + + static Flow AppendCord(void *_r, Value fval, Value val) { + RepeatedField* r = static_cast*>(_r); + AssignToCord(GetValue(val), r->Add()); + return UPB_CONTINUE; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForRequiredMessage() { + static upb_accessor_vtbl vtbl = { + &PushOffset, // StartSubMessage handler + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static AccessorVTable *GetForWeakMessage() { + static upb_accessor_vtbl vtbl = { + &StartWeakSubMessage, // StartSubMessage handler + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static AccessorVTable *GetForMessage() { + static upb_accessor_vtbl vtbl = { + &StartSubMessage, + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static SubFlow StartSubMessage(void *m, Value fval) { + const FieldDef* f = GetValue(fval); + Message **subm = static_cast(GetFieldPointer(m, f)); + if (*subm == f->prototype) *subm = (*subm)->New(); + return UPB_CONTINUE_WITH(*subm); + } + + static SubFlow StartWeakSubMessage(void *m, Value fval) { + const FieldDef* f = GetValue(fval); + Message **subm = static_cast(GetFieldPointer(m, f)); + if (*subm == NULL) { + const Message* prototype = static_cast(f->prototype); + *subm = prototype->New(); + } + return UPB_CONTINUE_WITH(*subm); + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static SubFlow StartRepeatedSubMessage(void* _r, Value fval) { + const FieldDef* f = GetValue(fval); + RepeatedPtrFieldBase *r = static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + const Message* prototype = static_cast(f->prototype); + submsg = prototype->New(); + r->AddAllocated(submsg); + } + return UPB_CONTINUE_WITH(submsg); + } +}; + +#endif + +} // namespace proto2_bridge_{google3,opensource} + +static const Message* GetPrototypeForMessage(const Message& m) { + const Message* ret = NULL; + MessageFactory* factory = FieldAccessor::GetMessageFactory(m); + if (factory) { + // proto2 generated message or DynamicMessage. + ret = factory->GetPrototype(m.GetDescriptor()); + assert(ret); + } else { + // Proto1 message; since proto1 has no dynamic message, it must be + // from the generated factory. + ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor()); + assert(ret); // If NULL, then wasn't a proto1 message, can't handle it. + } + assert(ret->GetReflection() == m.GetReflection()); + return ret; +} + +static const Message* GetPrototypeForField(const Message& m, + const FieldDescriptor* f) { +#ifdef UPB_GOOGLE3 + if (f->type() == FieldDescriptor::TYPE_BYTES) { + // Proto1 weak field: the proto2 descriptor says their type is BYTES. + const _pi::Proto2Reflection* r = + dynamic_cast(m.GetReflection()); + assert(r); + const _pi::Field* field = r->GetFieldLayout(f); + assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK); + return GetPrototypeForMessage( + *static_cast(field->weak_layout()->default_instance)); + } else if (dynamic_cast(m.GetReflection())) { + // Proto1 message; since proto1 has no dynamic message, it must be from + // the generated factory. + const Message* ret = + MessageFactory::generated_factory()->GetPrototype(f->message_type()); + assert(ret); + return ret; + } +#endif + assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE); + // We assume that all submessages (and extensions) will be constructed using + // the same MessageFactory as this message. This doesn't cover the case of + // CodedInputStream::SetExtensionRegistry(). + MessageFactory* factory = FieldAccessor::GetMessageFactory(m); + assert(factory); // If neither proto1 nor proto2 we can't handle it. + const Message* ret = factory->GetPrototype(f->message_type()); + assert(ret); + return ret; +} + +namespace proto2_bridge { + +upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) { + upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f); + upb_f->set_number(f->number()); + upb_f->set_name(f->name()); + upb_f->set_label(static_cast(f->label())); + upb_f->set_type(static_cast(f->type())); + + if (!FieldAccessor::TrySet(f, md, upb_f) +#ifdef UPB_GOOGLE3 + && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f) +#endif + ) { + // Unsupported reflection class. + assert(false); + } + + if (upb_f->type() == UPB_TYPE(ENUM)) { + // We set the enum default symbolically. + upb_f->set_default(f->default_value_enum()->name()); + upb_f->set_subtype_name(f->enum_type()->full_name()); + } else { + // Set field default for primitive types. Need to switch on the upb type + // rather than the proto2 type, because upb_f->type() may have been changed + // from BYTES to MESSAGE for a weak field. + switch (upb_types[upb_f->type()].inmemory_type) { + case UPB_CTYPE_INT32: + upb_f->set_default(MakeValue(f->default_value_int32())); + break; + case UPB_CTYPE_INT64: + upb_f->set_default( + MakeValue(static_cast(f->default_value_int64()))); + break; + case UPB_CTYPE_UINT32: + upb_f->set_default(MakeValue(f->default_value_uint32())); + break; + case UPB_CTYPE_UINT64: + upb_f->set_default( + MakeValue(static_cast(f->default_value_uint64()))); + break; + case UPB_CTYPE_DOUBLE: + upb_f->set_default(MakeValue(f->default_value_double())); + break; + case UPB_CTYPE_FLOAT: + upb_f->set_default(MakeValue(f->default_value_float())); + break; + case UPB_CTYPE_BOOL: + upb_f->set_default(MakeValue(f->default_value_bool())); + break; + case UPB_CTYPE_BYTEREGION: + upb_f->set_default(f->default_value_string()); + break; + } + } + return md->AddField(upb_f, &upb_f) ? upb_f : NULL; +} + +upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) { + upb::MessageDef *md = upb::MessageDef::New(owner); + md->set_full_name(m.GetDescriptor()->full_name()); + md->prototype = GetPrototypeForMessage(m); + return md; +} + +upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) { + upb::EnumDef* e = upb::EnumDef::New(owner); + e->set_full_name(desc->full_name()); + for (int i = 0; i < desc->value_count(); i++) { + const EnumValueDescriptor* val = desc->value(i); + bool success = e->AddValue(val->name(), val->number()); + assert(success); + (void)success; + } + return e; +} + +void AddAllFields(upb::MessageDef* md) { + const Descriptor* d = + static_cast(md->prototype)->GetDescriptor(); + for (int i = 0; i < d->field_count(); i++) { +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (d->field(i)->options().lazy()) continue; +#endif + // Extensions not supported yet. + if (d->field(i)->is_extension()) continue; + AddFieldDef(d->field(i), md); + } +} + +upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) { + upb::MessageDef* md = NewEmptyMessageDef(m, owner); + AddAllFields(md); + // TODO(haberman): add unknown field handler and extensions. + return md; +} + +typedef std::map SymbolMap; + +static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner, + SymbolMap* symbols) { + upb::MessageDef* md = NewFullMessageDef(m, owner); + // Must do this before processing submessages to prevent infinite recursion. + (*symbols)[std::string(md->full_name())] = md->AsDef(); + + for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) { + upb::FieldDef* f = i.field(); + if (!f->HasSubDef()) continue; + SymbolMap::iterator iter = symbols->find(f->subtype_name()); + upb::Def* subdef; + if (iter != symbols->end()) { + subdef = iter->second; + } else { + const FieldDescriptor* proto2_f = + m.GetDescriptor()->FindFieldByNumber(f->number()); + if (f->type() == UPB_TYPE(ENUM)) { + subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef(); + (*symbols)[std::string(subdef->full_name())] = subdef; + } else { + assert(f->IsSubmessage()); + const Message* prototype = GetPrototypeForField(m, proto2_f); + subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef(); + } + } + f->set_subdef(subdef); + } + return md; +} + +const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) { + SymbolMap symbols; + upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols); + + // Finalize defs. + std::vector defs; + SymbolMap::iterator iter; + for (iter = symbols.begin(); iter != symbols.end(); ++iter) { + defs.push_back(iter->second); + } + Status status; + bool success = Def::Finalize(defs, &status); + assert(success); + (void)success; + + // Unref all defs except the top-level one that we are returning. + for (int i = 0; i < static_cast(defs.size()); i++) { + if (defs[i] != ret->AsDef()) defs[i]->Unref(owner); + } + + return ret; +} + +} // namespace proto2_bridge +} // namespace upb diff --git a/bindings/cpp/upb/proto2_bridge.hpp b/bindings/cpp/upb/proto2_bridge.hpp new file mode 100644 index 0000000..ace08ce --- /dev/null +++ b/bindings/cpp/upb/proto2_bridge.hpp @@ -0,0 +1,170 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// A bridge between upb and proto2, allows populating proto2 generated +// classes using upb's parser, translating between descriptors and defs, etc. +// +// This is designed to be able to be compiled against either the open-source +// version of protocol buffers or the Google-internal proto2. The two are +// the same in most ways, but live in different namespaces (proto2 vs +// google::protobuf) and have a few other more minor differences. +// +// The bridge gives you a lot of control over which fields will be written to +// the message (fields that are not written will just be skipped), and whether +// unknown fields are written to the UnknownFieldSet. This can save a lot of +// work if the client only cares about some subset of the fields. +// +// Example usage: +// +// // Build a def that will have all fields and parse just like proto2 would. +// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto()); +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::Handlers* handlers = upb::NewHandlersForMessage(md); +// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers); +// handlers->Unref(); +// +// // The actual parsing. +// MyProto proto; +// upb::Decoder decoder; +// upb::StringSource source(buf, len); +// decoder.ResetPlan(plan, 0); +// decoder.ResetInput(source.AllBytes(), &proto); +// CHECK(decoder.Decode() == UPB_OK) << decoder.status(); +// +// To parse only one field and skip all others: +// +// const upb::MessageDef* md = +// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype()); +// upb::proto2_bridge::AddFieldDef( +// MyProto::descriptor()->FindFieldByName("my_field"), md); +// upb::Finalize(md); +// +// // Now continue with "JIT the parser" from above. +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_PROTO2_BRIDGE +#define UPB_PROTO2_BRIDGE + +#include + +namespace google { +namespace protobuf { +class Descriptor; +class EnumDescriptor; +class FieldDescriptor; +class FileDescriptor; +class Message; +} // namespace google +} // namespace protobuf + +namespace proto2 { +class Descriptor; +class EnumDescriptor; +class FieldDescriptor; +class FileDescriptor; +class Message; +} // namespace proto2 + + +namespace upb { + +class Def; +class FieldDef; +class MessageDef; + +namespace proto2_bridge { + +// Unfinalized defs //////////////////////////////////////////////////////////// + +// Creating of UNFINALIZED defs. All of these functions return defs that are +// still mutable and have not been finalized. They must be finalized before +// using them to parse anything. This is useful if you want more control over +// the process of constructing defs, eg. to add the specific set of fields you +// care about. + +// Creates a new upb::MessageDef that corresponds to the type in the given +// prototype message. The MessageDef will not have any fields added to it. +upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner); +upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc, + void *owner); + +// Adds a new upb::FieldDef to the given MessageDef corresponding to the given +// FieldDescriptor. The FieldDef will be given an accessor and offset so that +// it can be used to read and write data into the proto2::Message classes. +// The given MessageDef must have been constructed with NewEmptyDefForMessage() +// and f->containing_type() must correspond to the message that was used. +// +// Any submessage, group, or enum fields will be given symbolic references to +// the subtype, which must be resolved before the MessageDef can be finalized. +// +// On success, returns the FieldDef that was added (caller does not own a ref). +// If an existing field had the same name or number, returns NULL. +upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f, + upb::MessageDef* md); +upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f, + upb::MessageDef* md); + +// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds +// FieldDefs for all fields defined in the original message, but not for any +// extensions or unknown fields. The given MessageDef must not have any fields +// that have the same name or number as any of the fields we are adding (the +// easiest way to guarantee this is to start with an empty MessageDef). +// +// Returns true on success or false if any of the fields could not be added. +void AddAllFields(upb::MessageDef* md); + +// TODO(haberman): Add: +// // Adds a handler that will store unknown fields in the UnknownFieldSet. +// void AddUnknownFieldHandler(upb::MessageDef* md); + +// Returns a new upb::MessageDef that contains handlers for all fields, unknown +// fields, and any extensions in the descriptor's pool. The resulting +// def/handlers should be equivalent to the generated code constructed by the +// protobuf compiler (or the code in DynamicMessage) for the given type. +// The subdefs for message/enum fields (if any) will be referenced symbolically, +// and will need to be resolved before being finalized. +// +// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions). +// +// TODO(haberman): possibly add a similar function that lets you supply a +// separate DescriptorPool and MessageFactory for extensions, to support +// proto2's io::CodedInputStream::SetExtensionRegistry(). +upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner); +upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m, + void *owner); + +// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor. +// Caller owns a ref on the returned EnumDef. +upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner); +upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc, + void *owner); + +// Finalized defs ////////////////////////////////////////////////////////////// + +// These functions return FINALIZED defs, meaning that they are immutable and +// ready for use. Since they are immutable you cannot make any further changes +// to eg. the set of fields, but these functions are more convenient if you +// simply want to parse a message exactly how the built-in proto2 parser would. + +// Creates a returns a finalized MessageDef for the give message and its entire +// type tree that will include all fields and unknown handlers (ie. it will +// parse just like proto2 would). +const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m, + void *owner); +const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m, + void *owner); + +} // namespace proto2_bridge +} // namespace upb + +#endif diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp index 226859c..48c2708 100644 --- a/bindings/cpp/upb/upb.hpp +++ b/bindings/cpp/upb/upb.hpp @@ -10,6 +10,16 @@ #include "upb/upb.h" #include +#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11) +#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \ + class_name() = delete; \ + ~class_name() = delete; +#else +#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \ + class_name(); \ + ~class_name(); +#endif + namespace upb { typedef upb_success_t Success; @@ -31,11 +41,35 @@ class Status : public upb_status { void Clear() { upb_status_clear(this); } }; -class Value : public upb_value { - public: - Value(const upb_value& val) { *this = val; } - Value() {} -}; +typedef upb_value Value; + +template T GetValue(Value v); +template Value MakeValue(T v); + +#define UPB_VALUE_ACCESSORS(type, ctype) \ + template <> inline ctype GetValue(Value v) { \ + return upb_value_get ## type(v); \ + } \ + template <> inline Value MakeValue(ctype v) { \ + return upb_value_ ## type(v); \ + } + +UPB_VALUE_ACCESSORS(double, double); +UPB_VALUE_ACCESSORS(float, float); +UPB_VALUE_ACCESSORS(int32, int32_t); +UPB_VALUE_ACCESSORS(int64, int64_t); +UPB_VALUE_ACCESSORS(uint32, uint32_t); +UPB_VALUE_ACCESSORS(uint64, uint64_t); +UPB_VALUE_ACCESSORS(bool, bool); + +#undef UPB_VALUE_ACCESSORS + +template inline T* GetPtrValue(Value v) { + return static_cast(upb_value_getptr(v)); +} +template inline Value MakePtrValue(T* v) { + return upb_value_ptr(static_cast(v)); +} INLINE std::ostream& operator<<(std::ostream& out, const Status& status) { out << status.GetString(); -- cgit v1.2.3