diff options
Diffstat (limited to 'upb/bindings')
27 files changed, 4654 insertions, 0 deletions
diff --git a/upb/bindings/README b/upb/bindings/README new file mode 100644 index 0000000..e4bf0b8 --- /dev/null +++ b/upb/bindings/README @@ -0,0 +1,25 @@ +This directory contains code that interfaces upb with external C/C++ +libraries. For example: + + * upb/bindings/{stdc,stdc++} + interfaces between upb and the standard libraries of C and C++ (like C's + FILE/stdio, C++'s string/iostream, etc.) + + * upb/bindings/googlepb + interfaces between upb and the "protobuf" library distributed by Google. + + * upb/bindings/lua: + a Lua extension that exposes upb to Lua programs via the Lua C API. + + * upb/bindings/linux: + code and build system for building upb as a Linux kernel module. + +The two key characteristics that decide whether code belongs in upb/bindings/ +are: + + * Does the code's public API refer to types from another library? + If so it belongs in upb/bindings/. But this doesn't include code that just + happens to use another library internally, as an implementation detail. + + * Would this code be useful to someone who is not using this external library + in some other way? If so, the code probably doesn't belong in upb/bindings/. diff --git a/upb/bindings/googlepb/README b/upb/bindings/googlepb/README new file mode 100644 index 0000000..e3140f4 --- /dev/null +++ b/upb/bindings/googlepb/README @@ -0,0 +1,20 @@ +This directory contains code to interoperate with Google's official +Protocol Buffers release. Since it doesn't really have a name +besides "protobuf," calling this directory "googlepb" seems like the +least confusing option, since it lives in the google::protobuf +namespace. + +We support writing into protobuf's generated classes (and hopefully +reading too, before long). We support both the open source protobuf +release and the Google-internal version (which is mostly the same +code, just in a different namespace). A single compile of upb can +support both (there are no conflicts thanks to function overloading). + +The internal version supports some features that are not supported in +the open-source release. Also, the internal version includes the +legacy "proto1" classes which we must support; thankfully this is +mostly relegated to its own separate file. + +Our functionality requires the full google::protobuf::Message +interface; we rely on reflection so we know what fields to read/write +and where to put them, so we can't support MessageLite. diff --git a/upb/bindings/googlepb/bridge.cc b/upb/bindings/googlepb/bridge.cc new file mode 100644 index 0000000..a125249 --- /dev/null +++ b/upb/bindings/googlepb/bridge.cc @@ -0,0 +1,279 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// IMPORTANT NOTE! Inside Google, This file is compiled TWICE, once with +// UPB_GOOGLE3 defined and once without! This allows us to provide +// functionality against proto2 and protobuf opensource both in a single binary +// without the two conflicting. However we must be careful not to violate the +// ODR. + +#include "upb/bindings/googlepb/bridge.h" + +#include <stdio.h> +#include <map> +#include <string> +#include "upb/def.h" +#include "upb/bindings/googlepb/proto1.h" +#include "upb/bindings/googlepb/proto2.h" +#include "upb/handlers.h" + +#define ASSERT_STATUS(status) do { \ + if (!upb_ok(status)) { \ + fprintf(stderr, "upb status failure: %s\n", upb_status_errmsg(status)); \ + assert(upb_ok(status)); \ + } \ + } while (0) + +#ifdef UPB_GOOGLE3 +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/message.h" +#include "net/proto2/proto/descriptor.pb.h" +namespace goog = ::proto2; +#else +#include "google/protobuf/descriptor.h" +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.pb.h" +namespace goog = ::google::protobuf; +#endif + +namespace { + +const goog::Message* GetPrototype(const goog::Message& m, + const goog::FieldDescriptor* f) { + const goog::Message* ret = NULL; +#ifdef UPB_GOOGLE3 + ret = upb::google::GetProto1WeakPrototype(m, f); + if (ret) return ret; +#endif + + if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) { + ret = upb::google::GetFieldPrototype(m, f); +#ifdef UPB_GOOGLE3 + if (!ret) ret = upb::google::GetProto1FieldPrototype(m, f); +#endif + assert(ret); + } + return ret; +} + +} // namespace + +namespace upb { +namespace googlepb { + + +/* DefBuilder ****************************************************************/ + +const EnumDef* DefBuilder::GetEnumDef(const goog::EnumDescriptor* ed) { + const EnumDef* cached = FindInCache<EnumDef>(ed); + if (cached) return cached; + + EnumDef* e = AddToCache(ed, EnumDef::New()); + + Status status; + e->set_full_name(ed->full_name(), &status); + for (int i = 0; i < ed->value_count(); i++) { + const goog::EnumValueDescriptor* val = ed->value(i); + bool success = e->AddValue(val->name(), val->number(), &status); + UPB_ASSERT_VAR(success, success); + } + + e->Freeze(&status); + + ASSERT_STATUS(&status); + return e; +} + +const MessageDef* DefBuilder::GetMaybeUnfrozenMessageDef( + const goog::Descriptor* d, const goog::Message* m) { + const MessageDef* cached = FindInCache<MessageDef>(d); + if (cached) return cached; + + MessageDef* md = AddToCache(d, MessageDef::New()); + to_freeze_.push_back(upb::upcast(md)); + + Status status; + md->set_full_name(d->full_name(), &status); + ASSERT_STATUS(&status); + + // Find all regular fields and extensions for this message. + std::vector<const goog::FieldDescriptor*> fields; + d->file()->pool()->FindAllExtensions(d, &fields); + for (int i = 0; i < d->field_count(); i++) { + fields.push_back(d->field(i)); + } + + for (int i = 0; i < fields.size(); i++) { + const goog::FieldDescriptor* proto2_f = fields[i]; + assert(proto2_f); +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (proto2_f->options().lazy()) continue; +#endif + md->AddField(NewFieldDef(proto2_f, m), &status); + } + ASSERT_STATUS(&status); + return md; +} + +reffed_ptr<FieldDef> DefBuilder::NewFieldDef(const goog::FieldDescriptor* f, + const goog::Message* m) { + const goog::Message* subm = NULL; + const goog::Message* weak_prototype = NULL; + + if (m) { +#ifdef UPB_GOOGLE3 + weak_prototype = upb::google::GetProto1WeakPrototype(*m, f); +#endif + subm = GetPrototype(*m, f); + } + + reffed_ptr<FieldDef> upb_f(FieldDef::New()); + Status status; + upb_f->set_number(f->number(), &status); + upb_f->set_label(FieldDef::ConvertLabel(f->label())); + + if (f->is_extension()) { + upb_f->set_name(f->full_name(), &status); + upb_f->set_is_extension(true); + } else { + upb_f->set_name(f->name(), &status); + } + + // For weak fields, weak_prototype will be non-NULL even though the proto2 + // descriptor does not indicate a submessage field. + upb_f->set_descriptor_type(weak_prototype + ? UPB_DESCRIPTOR_TYPE_MESSAGE + : FieldDef::ConvertDescriptorType(f->type())); + + switch (upb_f->type()) { + case UPB_TYPE_INT32: + upb_f->set_default_int32(f->default_value_int32()); + break; + case UPB_TYPE_INT64: + upb_f->set_default_int64(f->default_value_int64()); + break; + case UPB_TYPE_UINT32: + upb_f->set_default_uint32(f->default_value_uint32()); + break; + case UPB_TYPE_UINT64: + upb_f->set_default_uint64(f->default_value_uint64()); + break; + case UPB_TYPE_DOUBLE: + upb_f->set_default_double(f->default_value_double()); + break; + case UPB_TYPE_FLOAT: + upb_f->set_default_float(f->default_value_float()); + break; + case UPB_TYPE_BOOL: + upb_f->set_default_bool(f->default_value_bool()); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + upb_f->set_default_string(f->default_value_string(), &status); + break; + case UPB_TYPE_MESSAGE: { + const goog::Descriptor* subd = + subm ? subm->GetDescriptor() : f->message_type(); + upb_f->set_message_subdef(GetMaybeUnfrozenMessageDef(subd, subm), + &status); + break; + } + case UPB_TYPE_ENUM: + // We set the enum default numerically. + upb_f->set_default_int32(f->default_value_enum()->number()); + upb_f->set_enum_subdef(GetEnumDef(f->enum_type()), &status); + break; + } + + ASSERT_STATUS(&status); + return upb_f; +} + +void DefBuilder::Freeze() { + upb::Status status; + upb::Def::Freeze(to_freeze_, &status); + ASSERT_STATUS(&status); + to_freeze_.clear(); +} + +const MessageDef* DefBuilder::GetMessageDef(const goog::Descriptor* d) { + const MessageDef* ret = GetMaybeUnfrozenMessageDef(d, NULL); + Freeze(); + return ret; +} + +const MessageDef* DefBuilder::GetMessageDefExpandWeak( + const goog::Message& m) { + const MessageDef* ret = GetMaybeUnfrozenMessageDef(m.GetDescriptor(), &m); + Freeze(); + return ret; +} + + +/* CodeCache *****************************************************************/ + +const Handlers* CodeCache::GetMaybeUnfrozenWriteHandlers( + const MessageDef* md, const goog::Message& m) { + const Handlers* cached = FindInCache(md); + if (cached) return cached; + + Handlers* h = AddToCache(md, upb::Handlers::New(md)); + to_freeze_.push_back(h); + const goog::Descriptor* d = m.GetDescriptor(); + + for (upb::MessageDef::const_iterator i = md->begin(); i != md->end(); ++i) { + const FieldDef* upb_f = *i; + + const goog::FieldDescriptor* proto2_f = + d->FindFieldByNumber(upb_f->number()); + if (!proto2_f) { + proto2_f = d->file()->pool()->FindExtensionByNumber(d, upb_f->number()); + } + assert(proto2_f); + + if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h) +#ifdef UPB_GOOGLE3 + && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h) +#endif + ) { + // Unsupported reflection class. + // + // Should we fall back to using the public Reflection interface in this + // case? It's unclear whether it's supported behavior for users to + // create their own Reflection classes. + assert(false); + } + + if (upb_f->type() == UPB_TYPE_MESSAGE) { + const goog::Message* prototype = GetPrototype(m, proto2_f); + assert(prototype); + const upb::Handlers* sub_handlers = + GetMaybeUnfrozenWriteHandlers(upb_f->message_subdef(), *prototype); + h->SetSubHandlers(upb_f, sub_handlers); + } + } + + return h; +} + +const Handlers* CodeCache::GetWriteHandlers(const goog::Message& m) { + const MessageDef* md = def_builder_.GetMessageDefExpandWeak(m); + const Handlers* ret = GetMaybeUnfrozenWriteHandlers(md, m); + upb::Status status; + upb::Handlers::Freeze(to_freeze_, &status); + ASSERT_STATUS(&status); + to_freeze_.clear(); + return ret; +} + +upb::reffed_ptr<const upb::Handlers> NewWriteHandlers(const goog::Message& m) { + CodeCache cache; + return upb::reffed_ptr<const upb::Handlers>(cache.GetWriteHandlers(m)); +} + +} // namespace googlepb +} // namespace upb diff --git a/upb/bindings/googlepb/bridge.h b/upb/bindings/googlepb/bridge.h new file mode 100644 index 0000000..9eed51b --- /dev/null +++ b/upb/bindings/googlepb/bridge.h @@ -0,0 +1,205 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// This file contains functionality for constructing upb Defs and Handlers +// corresponding to proto2 messages. Using this functionality, you can use upb +// to dynamically generate parsing code that can behave exactly like proto2's +// generated parsing code. Alternatively, you can configure things to +// read/write only a subset of the fields for higher performance when only some +// fields are needed. +// +// Example usage: +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::reffed_ptr<const upb::Handlers> write_myproto( +// upb::google::NewWriteHandlers(MyProto())); +// upb::reffed_ptr<const upb::Handlers> parse_myproto( +// upb::Decoder::NewDecoderHandlers(write_myproto.get(), true)); +// +// // The actual parsing. +// MyProto proto; +// upb::SeededPipeline<8192> pipeline(upb_realloc, NULL); +// upb::Sink* write_sink = pipeline.NewSink(write_myproto.get()); +// upb::Sink* parse_sink = pipeline.NewSink(parse_myproto.get()); +// upb::pb::Decoder* decoder = decoder_sink->GetObject<upb::pb::Decoder>(); +// upb::pb::ResetDecoderSink(decoder, write_sink); +// write_sink->Reset(&proto); +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_GOOGLE_BRIDGE_H_ +#define UPB_GOOGLE_BRIDGE_H_ + +#include <map> +#include <vector> +#include "upb/handlers.h" +#include "upb/upb.h" + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Descriptor; +class EnumDescriptor; +class Message; +} // namespace protobuf +} // namespace google + +namespace proto2 { +class FieldDescriptor; +class Descriptor; +class EnumDescriptor; +class Message; +} + +namespace upb { + +namespace googlepb { + +// Returns a upb::Handlers object that can be used to populate a proto2::Message +// object of the same type as "m." For more control over handler caching and +// reuse, instantiate a CodeCache object below. +upb::reffed_ptr<const upb::Handlers> NewWriteHandlers(const proto2::Message& m); +upb::reffed_ptr<const upb::Handlers> NewWriteHandlers( + const ::google::protobuf::Message& m); + +// Builds upb::Defs from proto2::Descriptors, and caches all built Defs for +// reuse. CodeCache (below) uses this internally; there is no need to use this +// class directly unless you only want Defs without corresponding Handlers. +// +// This class is NOT thread-safe. +class DefBuilder { + public: + // Functions to get or create a Def from a corresponding proto2 Descriptor. + // The returned def will be frozen. + // + // The caller must take a ref on the returned value if it needs it long-term. + // The DefBuilder will retain a ref so it can keep the Def cached, but + // garbage-collection functionality may be added to DefBuilder later that + // could unref the returned pointer. + const EnumDef* GetEnumDef(const proto2::EnumDescriptor* d); + const EnumDef* GetEnumDef(const ::google::protobuf::EnumDescriptor* d); + const MessageDef* GetMessageDef(const proto2::Descriptor* d); + const MessageDef* GetMessageDef(const ::google::protobuf::Descriptor* d); + + // Gets or creates a frozen MessageDef, properly expanding weak fields. + // + // Weak fields are only represented as BYTES fields in the Descriptor (unless + // you construct your descriptors in a somewhat complicated way; see + // https://goto.google.com/weak-field-descriptor), but we can get their true + // definitions relatively easily from the proto Message class. + const MessageDef* GetMessageDefExpandWeak(const proto2::Message& m); + const MessageDef* GetMessageDefExpandWeak( + const ::google::protobuf::Message& m); + + // Static methods for converting a def without building a DefBuilder. + static reffed_ptr<const MessageDef> NewMessageDef( + const proto2::Descriptor* d) { + DefBuilder builder; + return reffed_ptr<const MessageDef>(builder.GetMessageDef(d)); + } + + private: + // Like GetMessageDef*(), except the returned def might not be frozen. + // We need this function because circular graphs of MessageDefs need to all + // be frozen together, to we have to create the graphs of defs in an unfrozen + // state first. + // + // If m is non-NULL, expands weak message fields. + const MessageDef* GetMaybeUnfrozenMessageDef(const proto2::Descriptor* d, + const proto2::Message* m); + const MessageDef* GetMaybeUnfrozenMessageDef( + const ::google::protobuf::Descriptor* d, + const ::google::protobuf::Message* m); + + // Returns a new-unfrozen FieldDef corresponding to this FieldDescriptor. + // The return value is always newly created (never cached) and the returned + // pointer is the only owner of it. + // + // If "m" is non-NULL, expands the weak field if it is one, and populates + // *subm_prototype with a prototype of the submessage if this is a weak or + // non-weak MESSAGE or GROUP field. + reffed_ptr<FieldDef> NewFieldDef(const proto2::FieldDescriptor* f, + const proto2::Message* m); + reffed_ptr<FieldDef> NewFieldDef(const ::google::protobuf::FieldDescriptor* f, + const ::google::protobuf::Message* m); + + // Freeze all defs that haven't been frozen yet. + void Freeze(); + + template <class T> + T* AddToCache(const void *proto2_descriptor, reffed_ptr<T> def) { + assert(def_cache_.find(proto2_descriptor) == def_cache_.end()); + def_cache_[proto2_descriptor] = def; + return def.get(); // Continued lifetime is guaranteed by cache. + } + + template <class T> + const T* FindInCache(const void *proto2_descriptor) { + DefCache::iterator iter = def_cache_.find(proto2_descriptor); + return iter == def_cache_.end() ? NULL : + upb::down_cast<const T*>(iter->second.get()); + } + + private: + // Maps a proto2 descriptor to the corresponding upb Def we have constructed. + // The proto2 descriptor is void* because the proto2 descriptor types do not + // share a common base. + typedef std::map<const void*, reffed_ptr<upb::Def> > DefCache; + DefCache def_cache_; + + // Defs that have not been frozen yet. + std::vector<Def*> to_freeze_; +}; + +// Builds and caches upb::Handlers for populating proto2 generated classes. +// +// This class is NOT thread-safe. +class CodeCache { + public: + // Gets or creates handlers for populating messages of the given message type. + // + // The caller must take a ref on the returned value if it needs it long-term. + // The CodeCache will retain a ref so it can keep the Def cached, but + // garbage-collection functionality may be added to CodeCache later that could + // unref the returned pointer. + const Handlers* GetWriteHandlers(const proto2::Message& m); + const Handlers* GetWriteHandlers(const ::google::protobuf::Message& m); + + private: + const Handlers* GetMaybeUnfrozenWriteHandlers(const MessageDef* md, + const proto2::Message& m); + const Handlers* GetMaybeUnfrozenWriteHandlers( + const MessageDef* md, const ::google::protobuf::Message& m); + + Handlers* AddToCache(const MessageDef* md, reffed_ptr<Handlers> handlers) { + assert(handlers_cache_.find(md) == handlers_cache_.end()); + handlers_cache_[md] = handlers; + return handlers.get(); // Continue lifetime is guaranteed by the cache. + } + + const Handlers* FindInCache(const MessageDef* md) { + HandlersCache::iterator iter = handlers_cache_.find(md); + return iter == handlers_cache_.end() ? NULL : iter->second.get(); + } + + DefBuilder def_builder_; + + typedef std::map<const MessageDef*, upb::reffed_ptr<const Handlers> > + HandlersCache; + HandlersCache handlers_cache_; + + std::vector<Handlers*> to_freeze_; +}; + +} // namespace googlepb +} // namespace upb + +#endif // UPB_GOOGLE_BRIDGE_H_ diff --git a/upb/bindings/googlepb/proto1.cc b/upb/bindings/googlepb/proto1.cc new file mode 100644 index 0000000..c317cdf --- /dev/null +++ b/upb/bindings/googlepb/proto1.cc @@ -0,0 +1,483 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// This set of handlers can write into a proto2::Message whose reflection class +// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the +// name "Proto2Reflection" indicates that it is a reflection class implementing +// the proto2 reflection interface, but is used for proto1 generated messages). +// +// Like FieldAccessor this depends on breaking encapsulation, and will need to +// be changed if and when the details of _pi::Proto2Reflection change. +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ + +#include "upb/bindings/googlepb/proto1.h" + +#include <memory> + +#include "net/proto2/public/repeated_field.h" +#include "net/proto/internal_layout.h" +#include "net/proto/proto2_reflection.h" +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/shim/shim.h" +#include "upb/sink.h" + +// Unconditionally evaluate, but also assert in debug mode. +#define CHKRET(x) do { bool ok = (x); UPB_UNUSED(ok); assert(ok); } while (0) + +template <class T> static T* GetPointer(void* message, size_t offset) { + return reinterpret_cast<T*>(static_cast<char*>(message) + offset); +} + +namespace upb { +namespace google { + +class P2R_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, const upb::FieldDef* upb_f, + upb::Handlers* h) { + const proto2::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast<const _pi::Proto2Reflection*>(base_r); + if (!r) return false; + // Extensions don't exist in proto1. + assert(!proto2_f->is_extension()); + +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_##name: \ + case _pi::CREP_OPTIONAL_##name: \ + case _pi::CREP_REPEATED_##name: \ + SetPrimitiveHandlers<type_name>(proto2_f, r, upb_f, h); \ + return true; + + switch (r->GetFieldLayout(proto2_f)->crep) { + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: + SetOutOfLineStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: + SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: + SetMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: + case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2: + SetWeakMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: + assert(false); + return false; + } + } + +#undef PRIMITIVE + + // If the field "f" in the message "m" is a weak field, returns the prototype + // of the submessage (which may be a specific type or may be OpaqueMessage). + // Otherwise returns NULL. + static const proto2::Message* GetWeakPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection()); + if (!r) return NULL; + + const _pi::Field* field = r->GetFieldLayout(f); + if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + return static_cast<const proto2::Message*>( + field->weak_layout()->default_instance); + } else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) { + return field->proto2_weak_default_instance(); + } else { + return NULL; + } + } + + // If "m" is a message that uses Proto2Reflection, returns the prototype of + // the submessage (which may be OpaqueMessage for a weak field that is not + // linked in). Otherwise returns NULL. + static const proto2::Message* GetFieldPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const proto2::Message* ret = GetWeakPrototype(m, f); + if (ret) { + return ret; + } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) { + // Since proto1 has no dynamic message, it must be from the generated + // factory. + assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE); + ret = proto2::MessageFactory::generated_factory()->GetPrototype( + f->message_type()); + assert(ret); + return ret; + } else { + return NULL; + } + } + + private: + class FieldOffset { + public: + FieldOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template <class T> T* GetFieldPointer(proto2::Message* message) const { + return GetPointer<T>(message, offset_); + } + + void SetHasbit(void* message) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer<uint8_t>(message, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int16_t GetHasbit(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + assert(!f->is_repeated()); + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + + static uint16_t GetOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + // StartSequence ///////////////////////////////////////////////////////////// + + template <class T> + static void SetStartRepeatedField( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(PushOffset<proto2::RepeatedField<T> >, + new FieldOffset(proto2_f, r)))); + } + + template <class T> + static void SetStartRepeatedPtrField( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(PushOffset<proto2::RepeatedPtrField<T> >, + new FieldOffset(proto2_f, r)))); + } + + static void SetStartRepeatedSubmessageField( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBind(PushOffset<proto2::internal::RepeatedPtrFieldBase>, + new FieldOffset(proto2_f, r)))); + } + + template <class T> + static T* PushOffset(proto2::Message* m, const FieldOffset* offset) { + return offset->GetFieldPointer<T>(m); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template <typename T> + static void SetPrimitiveHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartRepeatedField<T>(proto2_f, r, f, h); + CHKRET(h->SetValueHandler<T>(f, UpbMakeHandlerT(Append<T>))); + } else { + CHKRET( + upb::Shim::Set(h, f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r))); + } + } + + template <typename T> + static void Append(proto2::RepeatedField<T>* r, T val) { + // Proto1's ProtoArray class derives from proto2::RepeatedField. + r->Add(val); + } + + // String //////////////////////////////////////////////////////////////////// + + static void SetStringHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, UpbMakeHandler(OnStringBuf)); + if (f->IsSequence()) { + SetStartRepeatedPtrField<string>(proto2_f, r, f, h); + CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedString))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartString, new FieldOffset(proto2_f, r)))); + } + } + + static string* StartString(proto2::Message* m, const FieldOffset* info, + size_t size_hint) { + info->SetHasbit(m); + string* str = info->GetFieldPointer<string>(m); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + static void OnStringBuf(string* s, const char* buf, size_t n) { + s->append(buf, n); + } + + static string* StartRepeatedString(proto2::RepeatedPtrField<string>* r, + size_t size_hint) { + string* str = r->Add(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static void SetOutOfLineStringHandlers( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + // This type is only used for non-repeated string fields. + assert(!f->IsSequence()); + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartOutOfLineString, new FieldOffset(proto2_f, r)))); + CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnStringBuf))); + } + + static string* StartOutOfLineString(proto2::Message* m, + const FieldOffset* info, + size_t size_hint) { + info->SetHasbit(m); + string** str = info->GetFieldPointer<string*>(m); + if (*str == &::proto2::internal::GetEmptyString()) + *str = new string(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartRepeatedField<Cord>(proto2_f, r, f, h); + CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedCord))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartCord, new FieldOffset(proto2_f, r)))); + } + CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnCordBuf))); + } + + static Cord* StartCord(proto2::Message* m, const FieldOffset* offset, + size_t size_hint) { + UPB_UNUSED(size_hint); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer<Cord>(m); + field->Clear(); + return field; + } + + static void OnCordBuf(Cord* c, const char* buf, size_t n) { + c->Append(StringPiece(buf, n)); + } + + static Cord* StartRepeatedCord(proto2::RepeatedField<Cord>* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + return r->Add(); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData(const proto2::Message& prototype, + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : FieldOffset(f, r) { + prototype_ = GetWeakPrototype(prototype, f); + if (!prototype_) prototype_ = GetFieldPrototype(prototype, f); + } + + const proto2::Message* prototype() const { return prototype_; } + + private: + const proto2::Message* prototype_; + }; + + static void SetRequiredMessageHandlers( + const proto2::FieldDescriptor* proto2_f, const proto2::Message& m, + const _pi::Proto2Reflection* r, const upb::FieldDef* f, + upb::Handlers* h) { + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, + new SubMessageHandlerData(m, proto2_f, r)))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRequiredSubMessage, new FieldOffset(proto2_f, r)))); + } + } + + static proto2::Message* StartRequiredSubMessage(proto2::Message* m, + const FieldOffset* offset) { + offset->SetHasbit(m); + return offset->GetFieldPointer<proto2::Message>(m); + } + + static void SetMessageHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + std::unique_ptr<SubMessageHandlerData> data( + new SubMessageHandlerData(m, proto2_f, r)); + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartSubMessage, data.release()))); + } + } + + static void SetWeakMessageHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + std::unique_ptr<SubMessageHandlerData> data( + new SubMessageHandlerData(m, proto2_f, r)); + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartWeakSubMessage, data.release()))); + } + } + + static void* StartSubMessage(proto2::Message* m, + const SubMessageHandlerData* info) { + info->SetHasbit(m); + proto2::Message** subm = info->GetFieldPointer<proto2::Message*>(m); + if (*subm == info->prototype()) *subm = (*subm)->New(); + return *subm; + } + + static void* StartWeakSubMessage(proto2::Message* m, + const SubMessageHandlerData* info) { + info->SetHasbit(m); + proto2::Message** subm = info->GetFieldPointer<proto2::Message*>(m); + if (*subm == NULL) { + *subm = info->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef proto2::Message Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + UPB_UNUSED(t); + assert(false); + } + }; + + // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through + // its base class RepeatedPtrFieldBase*. + static proto2::Message* StartRepeatedSubMessage( + proto2::internal::RepeatedPtrFieldBase* r, + const SubMessageHandlerData* info) { + proto2::Message* submsg = r->AddFromCleared<RepeatedMessageTypeHandler>(); + if (!submsg) { + submsg = info->prototype()->New(); + r->AddAllocated<RepeatedMessageTypeHandler>(submsg); + } + return submsg; + } +}; + +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return P2R_Handlers::TrySet(proto2_f, m, upb_f, h); +} + +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetWeakPrototype(m, f); +} + +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/bindings/googlepb/proto1.h b/upb/bindings/googlepb/proto1.h new file mode 100644 index 0000000..eb550ac --- /dev/null +++ b/upb/bindings/googlepb/proto1.h @@ -0,0 +1,51 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Support for registering field handlers that can write into a legacy proto1 +// message. This functionality is only needed inside Google. +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO1_H_ +#define UPB_GOOGLE_PROTO1_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetProto1WeakPrototype(const proto2::Message& m, + const proto2::FieldDescriptor* f); + +// Returns a prototype for the given non-weak field in "m". +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, const proto2::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO1_H_ diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc new file mode 100644 index 0000000..c0b4907 --- /dev/null +++ b/upb/bindings/googlepb/proto2.cc @@ -0,0 +1,816 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/bindings/googlepb/proto2.h" + +#include "upb/def.h" +#include "upb/bindings/googlepb/proto1.h" +#include "upb/handlers.h" +#include "upb/shim/shim.h" +#include "upb/sink.h" + +namespace { + +template<typename To, typename From> To CheckDownCast(From* f) { + assert(f == NULL || dynamic_cast<To>(f) != NULL); + return static_cast<To>(f); +} + +} + +// Unconditionally evaluate, but also assert in debug mode. +#define CHKRET(x) do { bool ok = (x); UPB_UNUSED(ok); assert(ok); } while (0) + +namespace upb { +namespace google_google3 { class GMR_Handlers; } +namespace google_opensource { class GMR_Handlers; } +} // namespace upb + +// BEGIN DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////// + +#ifdef UPB_GOOGLE3 + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/extension_set.h" +#include "net/proto2/public/generated_message_reflection.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/repeated_field.h" +#include "net/proto2/public/string_piece_field_support.h" + +namespace goog = ::proto2; +namespace me = ::upb::google_google3; + +#else + +// TODO(haberman): remove these once new versions of protobuf that "friend" +// upb are pervasive in the wild. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/extension_set.h" +#include "google/protobuf/message.h" + +namespace goog = ::google::protobuf; +namespace me = ::upb::google_opensource; + +using goog::int32; +using goog::int64; +using goog::uint32; +using goog::uint64; +using goog::scoped_ptr; + +#endif // ifdef UPB_GOOGLE3 + +// END DOUBLE COMPILATION TRICKERY. //////////////////////////////////////////// + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +template <class T> static T* GetPointer(void* message, size_t offset) { + return reinterpret_cast<T*>(static_cast<char*>(message) + offset); +} +template <class T> +static const T* GetConstPointer(const void* message, size_t offset) { + return reinterpret_cast<const T*>(static_cast<const char*>(message) + offset); +} + +// This class contains handlers that can write into a proto2 class whose +// reflection class is GeneratedMessageReflection. (Despite the name, even +// DynamicMessage uses GeneratedMessageReflection, so this covers all proto2 +// messages generated by the compiler.) To do this it must break the +// encapsulation of GeneratedMessageReflection and therefore depends on +// internal interfaces that are not guaranteed to be stable. This class will +// need to be updated if any non-backward-compatible changes are made to +// GeneratedMessageReflection. +class me::GMR_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const goog::FieldDescriptor* proto2_f, + const goog::Message& m, const upb::FieldDef* upb_f, + upb::Handlers* h) { + const goog::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast<const goog::internal::GeneratedMessageReflection*>(base_r); + if (!r) return false; + +#define PRIMITIVE_TYPE(cpptype, cident) \ +case goog::FieldDescriptor::cpptype: \ + SetPrimitiveHandlers<cident>(proto2_f, r, upb_f, h); \ + return true; + + switch (proto2_f->cpp_type()) { + PRIMITIVE_TYPE(CPPTYPE_INT32, int32); + PRIMITIVE_TYPE(CPPTYPE_INT64, int64); + PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32); + PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64); + PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double); + PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); + PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); + case goog::FieldDescriptor::CPPTYPE_ENUM: + if (proto2_f->is_extension()) { + SetEnumExtensionHandlers(proto2_f, r, upb_f, h); + } else { + SetEnumHandlers(proto2_f, r, upb_f, h); + } + return true; + case goog::FieldDescriptor::CPPTYPE_STRING: { + if (proto2_f->is_extension()) { +#ifdef UPB_GOOGLE3 + SetStringExtensionHandlers<string>(proto2_f, r, upb_f, h); +#else + SetStringExtensionHandlers<std::string>(proto2_f, r, upb_f, h); +#endif + return true; + } + + // Old versions of the open-source protobuf release erroneously default + // to Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() + : UPB_CTYPE_STRING; + switch (ctype) { +#ifdef UPB_GOOGLE3 + case goog::FieldOptions::STRING: + SetStringHandlers<string>(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::STRING_PIECE: + SetStringPieceHandlers(proto2_f, r, upb_f, h); + return true; +#else + case UPB_CTYPE_STRING: + SetStringHandlers<std::string>(proto2_f, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + case goog::FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (proto2_f->options().lazy()) { + assert(false); + return false; // Not yet implemented. + } +#endif + if (proto2_f->is_extension()) { + SetSubMessageExtensionHandlers(proto2_f, m, r, upb_f, h); + return true; + } + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: + return false; + } + } + +#undef PRIMITIVE_TYPE + + static const goog::Message* GetFieldPrototype( + const goog::Message& m, const goog::FieldDescriptor* f) { + // We assume that all submessages (and extensions) will be constructed + // using the same MessageFactory as this message. This doesn't cover the + // case of CodedInputStream::SetExtensionRegistry(). + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast<const goog::internal::GeneratedMessageReflection*>( + m.GetReflection()); + if (!r) return NULL; + return r->message_factory_->GetPrototype(f->message_type()); + } + + private: + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int64_t GetHasbit( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + // proto2 does not store hasbits for repeated fields. + assert(!f->is_repeated()); + return (r->has_bits_offset_ * 8) + f->index(); + } + + static uint16_t GetOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + class FieldOffset { + public: + FieldOffset(const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template <class T> T* GetFieldPointer(goog::Message* message) const { + return GetPointer<T>(message, offset_); + } + + void SetHasbit(void* m) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer<uint8_t>(m, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + class ExtensionFieldData { + public: + ExtensionFieldData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(r->extensions_offset_), + number_(proto2_f->number()), + type_(proto2_f->type()) { + } + + int number() const { return number_; } + goog::internal::FieldType type() const { return type_; } + + goog::internal::ExtensionSet* GetExtensionSet(goog::Message* m) const { + return GetPointer<goog::internal::ExtensionSet>(m, offset_); + } + + private: + const size_t offset_; + int number_; + goog::internal::FieldType type_; + }; + + // StartSequence ///////////////////////////////////////////////////////////// + + template <class T> + static void SetStartRepeatedField( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(&PushOffset<goog::RepeatedField<T> >, + new FieldOffset(proto2_f, r)))); + } + + template <class T> + static void SetStartRepeatedPtrField( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBindT(&PushOffset<goog::RepeatedPtrField<T> >, + new FieldOffset(proto2_f, r)))); + } + + static void SetStartRepeatedSubmessageField( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + CHKRET(h->SetStartSequenceHandler( + f, UpbBind(&PushOffset<goog::internal::RepeatedPtrFieldBase>, + new FieldOffset(proto2_f, r)))); + } + + template <class T> + static T* PushOffset(goog::Message* message, const FieldOffset* offset) { + return offset->GetFieldPointer<T>(message); + } + + // Primitive Value (numeric, bool) /////////////////////////////////////////// + + template <typename T> static void SetPrimitiveHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (proto2_f->is_extension()) { + scoped_ptr<ExtensionFieldData> data(new ExtensionFieldData(proto2_f, r)); + if (f->IsSequence()) { + CHKRET(h->SetValueHandler<T>( + f, UpbBindT(AppendPrimitiveExtension<T>, data.release()))); + } else { + CHKRET(h->SetValueHandler<T>( + f, UpbBindT(SetPrimitiveExtension<T>, data.release()))); + } + } else { + if (f->IsSequence()) { + SetStartRepeatedField<T>(proto2_f, r, f, h); + CHKRET(h->SetValueHandler<T>(f, UpbMakeHandlerT(AppendPrimitive<T>))); + } else { + CHKRET(upb::Shim::Set(h, f, GetOffset(proto2_f, r), + GetHasbit(proto2_f, r))); + } + } + } + + template <typename T> + static void AppendPrimitive(goog::RepeatedField<T>* r, T val) { r->Add(val); } + + template <typename T> + static void AppendPrimitiveExtension(goog::Message* m, + const ExtensionFieldData* data, T val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // TODO(haberman): give an accurate value for "packed" + goog::internal::RepeatedPrimitiveTypeTraits<T>::Add( + data->number(), data->type(), true, val, set); + } + + template <typename T> + static void SetPrimitiveExtension(goog::Message* m, + const ExtensionFieldData* data, T val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + goog::internal::PrimitiveTypeTraits<T>::Set(data->number(), data->type(), + val, set); + } + + // Enum ////////////////////////////////////////////////////////////////////// + + class EnumHandlerData : public FieldOffset { + public: + EnumHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f) + : FieldOffset(proto2_f, r), + field_number_(f->number()), + unknown_fields_offset_(r->unknown_fields_offset_), + enum_(upb_downcast_enumdef(f->subdef())) {} + + bool IsValidValue(int32_t val) const { + return enum_->FindValueByNumber(val) != NULL; + } + + int32_t field_number() const { return field_number_; } + + goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { + return GetPointer<goog::UnknownFieldSet>(m, unknown_fields_offset_); + } + + private: + int32_t field_number_; + size_t unknown_fields_offset_; + const upb::EnumDef* enum_; + }; + + static void SetEnumHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); + scoped_ptr<EnumHandlerData> data(new EnumHandlerData(proto2_f, r, f)); + if (f->IsSequence()) { + CHKRET(h->SetInt32Handler(f, UpbBind(AppendEnum, data.release()))); + } else { + CHKRET(h->SetInt32Handler(f, UpbBind(SetEnum, data.release()))); + } + } + + static void SetEnum(goog::Message* m, const EnumHandlerData* data, + int32_t val) { + if (data->IsValidValue(val)) { + int32_t* message_val = data->GetFieldPointer<int32_t>(m); + *message_val = val; + data->SetHasbit(m); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + } + + static void AppendEnum(goog::Message* m, const EnumHandlerData* data, + int32_t val) { + // Closure is the enclosing message. We can't use the RepeatedField<> as + // the closure because we need to go back to the message for unrecognized + // enum values, which go into the unknown field set. + if (data->IsValidValue(val)) { + goog::RepeatedField<int32_t>* r = + data->GetFieldPointer<goog::RepeatedField<int32_t> >(m); + r->Add(val); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + } + + // EnumExtension ///////////////////////////////////////////////////////////// + + static void SetEnumExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(proto2_f->is_extension()); + scoped_ptr<ExtensionFieldData> data(new ExtensionFieldData(proto2_f, r)); + if (f->IsSequence()) { + CHKRET( + h->SetInt32Handler(f, UpbBind(AppendEnumExtension, data.release()))); + } else { + CHKRET(h->SetInt32Handler(f, UpbBind(SetEnumExtension, data.release()))); + } + } + + static void SetEnumExtension(goog::Message* m, const ExtensionFieldData* data, + int32_t val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + set->SetEnum(data->number(), data->type(), val, NULL); + } + + static void AppendEnumExtension(goog::Message* m, + const ExtensionFieldData* data, int32_t val) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // TODO(haberman): give an accurate value for "packed" + set->AddEnum(data->number(), data->type(), true, val, NULL); + } + + // String //////////////////////////////////////////////////////////////////// + + // For scalar (non-repeated) string fields. + template <class T> class StringHandlerData : public FieldOffset { + public: + StringHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r) + : FieldOffset(proto2_f, r), + prototype_(*GetConstPointer<T*>(r->default_instance_, + GetOffset(proto2_f, r))) {} + + const T* prototype() const { return prototype_; } + + T** GetStringPointer(goog::Message* message) const { + return GetFieldPointer<T*>(message); + } + + private: + const T* prototype_; + }; + + template <typename T> static void SetStringHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + assert(!proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandlerT(&OnStringBuf<T>))); + if (f->IsSequence()) { + SetStartRepeatedPtrField<T>(proto2_f, r, f, h); + CHKRET( + h->SetStartStringHandler(f, UpbMakeHandlerT(StartRepeatedString<T>))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBindT(StartString<T>, new StringHandlerData<T>(proto2_f, r)))); + } + } + + // This needs to be templated because google3 string is not std::string. + template <typename T> + static T* StartString(goog::Message* m, const StringHandlerData<T>* data, + size_t size_hint) { + UPB_UNUSED(size_hint); + T** str = data->GetStringPointer(m); + data->SetHasbit(m); + // If it points to the default instance, we must create a new instance. + if (*str == data->prototype()) *str = new T(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + template <typename T> + static void OnStringBuf(T* str, const char* buf, size_t n) { + str->append(buf, n); + } + + template <typename T> + static T* StartRepeatedString(goog::RepeatedPtrField<T>* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + T* str = r->Add(); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // StringExtension /////////////////////////////////////////////////////////// + + template <typename T> + static void SetStringExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandlerT(OnStringBuf<T>))); + scoped_ptr<ExtensionFieldData> data(new ExtensionFieldData(proto2_f, r)); + if (f->IsSequence()) { + CHKRET(h->SetStartStringHandler( + f, UpbBindT(StartRepeatedStringExtension<T>, data.release()))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBindT(StartStringExtension<T>, data.release()))); + } + } + + // Templated because google3 is not std::string. + template <class T> + static T* StartStringExtension(goog::Message* m, + const ExtensionFieldData* data, + size_t size_hint) { + UPB_UNUSED(size_hint); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->MutableString(data->number(), data->type(), NULL); + } + + template <class T> + static T* StartRepeatedStringExtension(goog::Message* m, + const ExtensionFieldData* data, + size_t size_hint) { + UPB_UNUSED(size_hint); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->AddString(data->number(), data->type(), NULL); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData(const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : FieldOffset(f, r), prototype_(prototype) {} + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageHandlers( + const goog::FieldDescriptor* proto2_f, const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + const goog::Message* field_prototype = GetFieldPrototype(m, proto2_f); + scoped_ptr<SubMessageHandlerData> data( + new SubMessageHandlerData(proto2_f, r, field_prototype)); + if (f->IsSequence()) { + SetStartRepeatedSubmessageField(proto2_f, r, f, h); + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessage, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartSubMessage, data.release()))); + } + } + + static goog::Message* StartSubMessage(goog::Message* m, + const SubMessageHandlerData* data) { + data->SetHasbit(m); + goog::Message** subm = data->GetFieldPointer<goog::Message*>(m); + if (*subm == NULL || *subm == data->prototype()) { + *subm = data->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef goog::Message Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + UPB_UNUSED(t); + assert(false); + } + }; + + // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through + // its base class RepeatedPtrFieldBase*. + static goog::Message* StartRepeatedSubMessage( + goog::internal::RepeatedPtrFieldBase* r, + const SubMessageHandlerData* data) { + goog::Message* submsg = r->AddFromCleared<RepeatedMessageTypeHandler>(); + if (!submsg) { + submsg = data->prototype()->New(); + r->AddAllocated<RepeatedMessageTypeHandler>(submsg); + } + return submsg; + } + + // SubMessageExtension /////////////////////////////////////////////////////// + + class SubMessageExtensionHandlerData : public ExtensionFieldData { + public: + SubMessageExtensionHandlerData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : ExtensionFieldData(proto2_f, r), + prototype_(prototype) { + } + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + const goog::Message* field_prototype = GetFieldPrototype(m, proto2_f); + scoped_ptr<SubMessageExtensionHandlerData> data( + new SubMessageExtensionHandlerData(proto2_f, r, field_prototype)); + if (f->IsSequence()) { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartRepeatedSubMessageExtension, data.release()))); + } else { + CHKRET(h->SetStartSubMessageHandler( + f, UpbBind(StartSubMessageExtension, data.release()))); + } + } + + static goog::Message* StartRepeatedSubMessageExtension( + goog::Message* m, const SubMessageExtensionHandlerData* data) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // Because we found this message via a descriptor, we know it has a + // descriptor and is therefore a Message and not a MessageLite. + // Alternatively we could just use goog::MessageLite everywhere to avoid + // this, but since they are in fact goog::Messages, it seems most clear + // to refer to them as such. + return CheckDownCast<goog::Message*>(set->AddMessage( + data->number(), data->type(), *data->prototype(), NULL)); + } + + static goog::Message* StartSubMessageExtension( + goog::Message* m, const SubMessageExtensionHandlerData* data) { + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // See comment above re: this down cast. + return CheckDownCast<goog::Message*>(set->MutableMessage( + data->number(), data->type(), *data->prototype(), NULL)); + } + + // TODO(haberman): handle Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandler(&OnCordBuf))); + if (f->IsSequence()) { + SetStartRepeatedField<Cord>(proto2_f, r, f, h); + CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedCord))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartCord, new FieldOffset(proto2_f, r)))); + } + } + + static Cord* StartCord(goog::Message* m, const FieldOffset* offset, + size_t size_hint) { + UPB_UNUSED(size_hint); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer<Cord>(m); + field->Clear(); + return field; + } + + static void OnCordBuf(Cord* c, const char* buf, size_t n, + const upb::BufferHandle* handle) { + const Cord* source_cord = handle->GetAttachedObject<Cord>(); + if (source_cord) { + // This TODO is copied from CordReader::CopyToCord(): + // "We could speed this up by using CordReader internals." + Cord piece(*source_cord); + piece.RemovePrefix(handle->object_offset() + (buf - handle->buffer())); + assert(piece.size() >= n); + piece.RemoveSuffix(piece.size() - n); + + c->Append(piece); + } else { + c->Append(StringPiece(buf, n)); + } + } + + static Cord* StartRepeatedCord(proto2::RepeatedField<Cord>* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + return r->Add(); + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static void SetStringPieceHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); + CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnStringPieceBuf))); + if (f->IsSequence()) { + SetStartRepeatedPtrField<proto2::internal::StringPieceField>(proto2_f, r, + f, h); + CHKRET(h->SetStartStringHandler( + f, UpbMakeHandler(StartRepeatedStringPiece))); + } else { + CHKRET(h->SetStartStringHandler( + f, UpbBind(StartStringPiece, new FieldOffset(proto2_f, r)))); + } + } + + static void OnStringPieceBuf(proto2::internal::StringPieceField* field, + const char* buf, size_t len) { + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + size_t new_len = field->size() + len; + char* data = new char[new_len]; + memcpy(data, field->data(), field->size()); + memcpy(data + field->size(), buf, len); + field->CopyFrom(StringPiece(data, new_len)); + delete[] data; + } + + static proto2::internal::StringPieceField* StartStringPiece( + goog::Message* m, const FieldOffset* offset, size_t size_hint) { + UPB_UNUSED(size_hint); + offset->SetHasbit(m); + proto2::internal::StringPieceField* field = + offset->GetFieldPointer<proto2::internal::StringPieceField>(m); + field->Clear(); + return field; + } + + static proto2::internal::StringPieceField* StartRepeatedStringPiece( + proto2::RepeatedPtrField<proto2::internal::StringPieceField>* r, + size_t size_hint) { + UPB_UNUSED(size_hint); + proto2::internal::StringPieceField* field = r->Add(); + field->Clear(); + return field; + } + +#endif // UPB_GOOGLE3 +}; + +namespace upb { +namespace google { + +bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f, + const goog::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return me::GMR_Handlers::TrySet(proto2_f, prototype, upb_f, h); +} + +const goog::Message* GetFieldPrototype(const goog::Message& m, + const goog::FieldDescriptor* f) { + return me::GMR_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/bindings/googlepb/proto2.h b/upb/bindings/googlepb/proto2.h new file mode 100644 index 0000000..516b7fd --- /dev/null +++ b/upb/bindings/googlepb/proto2.h @@ -0,0 +1,61 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Support for registering field handlers that can write into a proto2 +// message that uses GeneratedMessageReflection (which includes all messages +// generated by the proto2 compiler as well as DynamicMessage). +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO2_H_ +#define UPB_GOOGLE_PROTO2_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +} +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); +bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f, + const ::google::protobuf::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetFieldPrototype(const proto2::Message& m, + const proto2::FieldDescriptor* f); +const ::google::protobuf::Message* GetFieldPrototype( + const ::google::protobuf::Message& m, + const ::google::protobuf::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO2_H_ diff --git a/upb/bindings/linux/Makefile b/upb/bindings/linux/Makefile new file mode 100644 index 0000000..1736b61 --- /dev/null +++ b/upb/bindings/linux/Makefile @@ -0,0 +1,20 @@ +obj-m = upb.o + +upb-objs = \ + ../../upb/upb.o \ + ../../upb/bytestream.o \ + ../../upb/def.o \ + ../../upb/handlers.o \ + ../../upb/table.o \ + ../../upb/refcount.o \ + ../../upb/msg.o \ + +KVERSION = $(shell uname -r) + +ccflags-y := -I$(PWD) -I$(PWD)/../.. -Wno-declaration-after-statement -std=gnu99 + +all: + make -C /lib/modules/$(KVERSION)/build M=$(PWD) modules + +clean: + make -C /lib/modules/$(KVERSION)/build M=$(PWD) clean diff --git a/upb/bindings/linux/assert.h b/upb/bindings/linux/assert.h new file mode 100644 index 0000000..26d8ab6 --- /dev/null +++ b/upb/bindings/linux/assert.h @@ -0,0 +1,20 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include <linux/kernel.h> + +#ifndef UPB_LINUX_ASSERT_H +#define UPB_LINUX_ASSERT_H + +#ifdef NDEBUG +#define assert(x) +#else +#define assert(x) \ + if (!(x)) panic("Assertion failed: %s at %s:%d", #x, __FILE__, __LINE__); +#endif + +#endif diff --git a/upb/bindings/linux/errno.h b/upb/bindings/linux/errno.h new file mode 100644 index 0000000..f45d939 --- /dev/null +++ b/upb/bindings/linux/errno.h @@ -0,0 +1,8 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include <linux/errno.h> diff --git a/upb/bindings/linux/stdint.h b/upb/bindings/linux/stdint.h new file mode 100644 index 0000000..2524b23 --- /dev/null +++ b/upb/bindings/linux/stdint.h @@ -0,0 +1,8 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include <linux/types.h> diff --git a/upb/bindings/linux/stdio.h b/upb/bindings/linux/stdio.h new file mode 100644 index 0000000..72c1b0d --- /dev/null +++ b/upb/bindings/linux/stdio.h @@ -0,0 +1,10 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Linux-kernel implementations of some stdlib.h functions. + */ + +#include <linux/kernel.h> // For sprintf and friends. diff --git a/upb/bindings/linux/stdlib.h b/upb/bindings/linux/stdlib.h new file mode 100644 index 0000000..8381b13 --- /dev/null +++ b/upb/bindings/linux/stdlib.h @@ -0,0 +1,22 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Linux-kernel implementations of some stdlib.h functions. + */ + +#include <linux/slab.h> + +#ifndef UPB_LINUX_STDLIB_H +#define UPB_LINUX_STDLIB_H + +static inline void *malloc(size_t size) { return kmalloc(size, GFP_ATOMIC); } +static inline void free(void *p) { kfree(p); } + +static inline void *realloc(void *p, size_t size) { + return krealloc(p, size, GFP_ATOMIC); +} + +#endif diff --git a/upb/bindings/linux/string.h b/upb/bindings/linux/string.h new file mode 100644 index 0000000..30ebf8a --- /dev/null +++ b/upb/bindings/linux/string.h @@ -0,0 +1,13 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#ifndef UPB_LINUX_STRING_H_ +#define UPB_LINUX_STRING_H_ + +#include <linux/string.h> + +#endif /* UPB_DEF_H_ */ diff --git a/upb/bindings/lua/table.c b/upb/bindings/lua/table.c new file mode 100644 index 0000000..51ba324 --- /dev/null +++ b/upb/bindings/lua/table.c @@ -0,0 +1,169 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Lua extension that provides access to upb_table. This is an internal-only + * interface and exists for the sole purpose of writing a C code generator in + * Lua that can dump a upb_table as static C initializers. This lets us use + * Lua for convenient string manipulation while saving us from re-implementing + * the upb_table hash function and hash table layout / collision strategy in + * Lua. + * + * Since this is used only as part of the toolchain (and not part of the + * runtime) we do not hold this module to the same stringent requirements as + * the main Lua modules (for example that misbehaving Lua programs cannot + * crash the interpreter). + */ + +#include <float.h> +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "lauxlib.h" +#include "upb/bindings/lua/upb.h" +#include "upb/def.h" + +static void lupbtable_setnum(lua_State *L, int tab, const char *key, + lua_Number val) { + lua_pushnumber(L, val); + lua_setfield(L, tab - 1, key); +} + +static void lupbtable_pushval(lua_State *L, _upb_value val, upb_ctype_t ctype) { + switch (ctype) { + case UPB_CTYPE_INT32: + lua_pushnumber(L, val.int32); + break; + case UPB_CTYPE_PTR: + lupb_def_pushwrapper(L, val.ptr, NULL); + break; + case UPB_CTYPE_CSTR: + lua_pushstring(L, val.cstr); + break; + default: + luaL_error(L, "Unexpected type: %d", ctype); + } +} + +// Sets a few fields common to both hash table entries and arrays. +static void lupbtable_setmetafields(lua_State *L, int ctype, const void *ptr) { + // We tack this onto every entry so we know it even if the entries + // don't stay with the table. + lua_pushnumber(L, ctype); + lua_setfield(L, -2, "valtype"); + + // Set this to facilitate linking. + lua_pushlightuserdata(L, (void*)ptr); + lua_setfield(L, -2, "ptr"); +} + +static void lupbtable_pushent(lua_State *L, const upb_tabent *e, + bool inttab, int ctype) { + lua_newtable(L); + if (!upb_tabent_isempty(e)) { + if (inttab) { + lua_pushnumber(L, e->key.num); + } else { + lua_pushstring(L, e->key.str); + } + lua_setfield(L, -2, "key"); + lupbtable_pushval(L, e->val, ctype); + lua_setfield(L, -2, "value"); + } + lua_pushlightuserdata(L, (void*)e->next); + lua_setfield(L, -2, "next"); + lupbtable_setmetafields(L, ctype, e); +} + +// Dumps the shared part of upb_table into a Lua table. +static void lupbtable_pushtable(lua_State *L, const upb_table *t, bool inttab) { + lua_newtable(L); + lupbtable_setnum(L, -1, "count", t->count); + lupbtable_setnum(L, -1, "mask", t->mask); + lupbtable_setnum(L, -1, "ctype", t->ctype); + lupbtable_setnum(L, -1, "size_lg2", t->size_lg2); + + lua_newtable(L); + for (int i = 0; i < upb_table_size(t); i++) { + lupbtable_pushent(L, &t->entries[i], inttab, t->ctype); + lua_rawseti(L, -2, i + 1); + } + lua_setfield(L, -2, "entries"); +} + +// Dumps a upb_inttable to a Lua table. +static void lupbtable_pushinttable(lua_State *L, const upb_inttable *t) { + lupbtable_pushtable(L, &t->t, true); + lupbtable_setnum(L, -1, "array_size", t->array_size); + lupbtable_setnum(L, -1, "array_count", t->array_count); + + lua_newtable(L); + for (int i = 0; i < t->array_size; i++) { + lua_newtable(L); + if (upb_arrhas(t->array[i])) { + lupbtable_pushval(L, t->array[i], t->t.ctype); + lua_setfield(L, -2, "val"); + } + lupbtable_setmetafields(L, t->t.ctype, &t->array[i]); + lua_rawseti(L, -2, i + 1); + } + lua_setfield(L, -2, "array"); +} + +static void lupbtable_pushstrtable(lua_State *L, const upb_strtable *t) { + lupbtable_pushtable(L, &t->t, false); +} + +static int lupbtable_msgdef_itof(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lupbtable_pushinttable(L, &m->itof); + return 1; +} + +static int lupbtable_msgdef_ntof(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lupbtable_pushstrtable(L, &m->ntof); + return 1; +} + +static int lupbtable_enumdef_iton(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + lupbtable_pushinttable(L, &e->iton); + return 1; +} + +static int lupbtable_enumdef_ntoi(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + lupbtable_pushstrtable(L, &e->ntoi); + return 1; +} + +static void lupbtable_setfieldi(lua_State *L, const char *field, int i) { + lua_pushnumber(L, i); + lua_setfield(L, -2, field); +} + +static const struct luaL_Reg lupbtable_toplevel_m[] = { + {"msgdef_itof", lupbtable_msgdef_itof}, + {"msgdef_ntof", lupbtable_msgdef_ntof}, + {"enumdef_iton", lupbtable_enumdef_iton}, + {"enumdef_ntoi", lupbtable_enumdef_ntoi}, + {NULL, NULL} +}; + +int luaopen_upbtable(lua_State *L) { + lupb_newlib(L, "upb.table", lupbtable_toplevel_m); + + // We define these here because they are not public. + lupbtable_setfieldi(L, "CTYPE_PTR", UPB_CTYPE_PTR); + lupbtable_setfieldi(L, "CTYPE_CSTR", UPB_CTYPE_CSTR); + lupbtable_setfieldi(L, "CTYPE_INT32", UPB_CTYPE_INT32); + + lua_pushlightuserdata(L, NULL); + lua_setfield(L, -2, "NULL"); + + return 1; // Return a single Lua value, the package table created above. +} diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c new file mode 100644 index 0000000..1e7540a --- /dev/null +++ b/upb/bindings/lua/upb.c @@ -0,0 +1,1208 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * A Lua extension for upb. Exposes only the core library + * (sub-libraries are exposed in other extensions). + */ + +#include <float.h> +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include "lauxlib.h" +#include "upb/bindings/lua/upb.h" +#include "upb/handlers.h" +#include "upb/pb/glue.h" + +// Lua metatable types. +#define LUPB_MSGDEF "lupb.msgdef" +#define LUPB_ENUMDEF "lupb.enumdef" +#define LUPB_FIELDDEF "lupb.fielddef" +#define LUPB_SYMTAB "lupb.symtab" + +// Other table constants. +#define LUPB_OBJCACHE "lupb.objcache" + +#if LUA_VERSION_NUM == 501 + +// Taken from Lua 5.2's source. +void *luaL_testudata(lua_State *L, int ud, const char *tname) { + void *p = lua_touserdata(L, ud); + if (p != NULL) { /* value is a userdata? */ + if (lua_getmetatable(L, ud)) { /* does it have a metatable? */ + luaL_getmetatable(L, tname); /* get correct metatable */ + if (!lua_rawequal(L, -1, -2)) /* not the same? */ + p = NULL; /* value is a userdata with wrong metatable */ + lua_pop(L, 2); /* remove both metatables */ + return p; + } + } + return NULL; /* value is not a userdata with a metatable */ +} + +#elif LUA_VERSION_NUM == 502 + +int luaL_typerror(lua_State *L, int narg, const char *tname) { + const char *msg = lua_pushfstring(L, "%s expected, got %s", + tname, luaL_typename(L, narg)); + return luaL_argerror(L, narg, msg); +} + +#else +#error Only Lua 5.1 and 5.2 are supported +#endif + +static const char *chkname(lua_State *L, int narg) { + size_t len; + const char *name = luaL_checklstring(L, narg, &len); + if (strlen(name) != len) + luaL_error(L, "names cannot have embedded NULLs"); + return name; +} + +static bool chkbool(lua_State *L, int narg, const char *type) { + if (!lua_isboolean(L, narg)) { + luaL_error(L, "%s must be true or false", type); + } + return lua_toboolean(L, narg); +} + +static bool streql(const char *a, const char *b) { return strcmp(a, b) == 0; } + +static uint32_t chkint32(lua_State *L, int narg, const char *name) { + lua_Number n = lua_tonumber(L, narg); + if (n > INT32_MAX || n < INT32_MIN || rint(n) != n) + luaL_error(L, "Invalid %s", name); + return n; +} + +// Sets a fielddef default from the given Lua value. +static void lupb_setdefault(lua_State *L, int narg, upb_fielddef *f) { + if (upb_fielddef_type(f) == UPB_TYPE_BOOL) { + upb_fielddef_setdefaultbool(f, chkbool(L, narg, "bool default")); + } else { + // Numeric type. + lua_Number num = luaL_checknumber(L, narg); + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_ENUM: + if (num > INT32_MAX || num < INT32_MIN || num != rint(num)) + luaL_error(L, "Cannot convert %f to 32-bit integer", num); + upb_fielddef_setdefaultint32(f, num); + break; + case UPB_TYPE_INT64: + if (num > INT64_MAX || num < INT64_MIN || num != rint(num)) + luaL_error(L, "Cannot convert %f to 64-bit integer", num); + upb_fielddef_setdefaultint64(f, num); + break; + case UPB_TYPE_UINT32: + if (num > UINT32_MAX || num < 0 || num != rint(num)) + luaL_error(L, "Cannot convert %f to unsigned 32-bit integer", num); + upb_fielddef_setdefaultuint32(f, num); + break; + case UPB_TYPE_UINT64: + if (num > UINT64_MAX || num < 0 || num != rint(num)) + luaL_error(L, "Cannot convert %f to unsigned 64-bit integer", num); + upb_fielddef_setdefaultuint64(f, num); + break; + case UPB_TYPE_DOUBLE: + if (num > DBL_MAX || num < -DBL_MAX) { + // This could happen if lua_Number was long double. + luaL_error(L, "Cannot convert %f to double", num); + } + upb_fielddef_setdefaultdouble(f, num); + break; + case UPB_TYPE_FLOAT: + if (num > FLT_MAX || num < -FLT_MAX) + luaL_error(L, "Cannot convert %f to float", num); + upb_fielddef_setdefaultfloat(f, num); + break; + default: luaL_error(L, "invalid type"); + } + } +} + +void lupb_checkstatus(lua_State *L, upb_status *s) { + if (!upb_ok(s)) { + lua_pushstring(L, upb_status_errmsg(s)); + lua_error(L); + } +} + +#define CHK(pred) do { \ + upb_status status = UPB_STATUS_INIT; \ + pred; \ + lupb_checkstatus(L, &status); \ + } while (0) + + +/* refcounted *****************************************************************/ + +// All upb objects that use upb_refcounted share a common Lua userdata +// representation and a common scheme for caching Lua wrapper object. They do +// however have different metatables. Objects are cached in a weak table +// indexed by the C pointer of the object they are caching. + +typedef union { + const upb_refcounted *refcounted; + const upb_def *def; + upb_symtab *symtab; +} lupb_refcounted; + +static bool lupb_refcounted_pushwrapper(lua_State *L, const upb_refcounted *obj, + const char *type, const void *owner) { + if (obj == NULL) { + lua_pushnil(L); + return false; + } + + // Lookup our cache in the registry (we don't put our objects in the registry + // directly because we need our cache to be a weak table). + lupb_refcounted *ud = NULL; + lua_getfield(L, LUA_REGISTRYINDEX, LUPB_OBJCACHE); + assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. + lua_pushlightuserdata(L, (void*)obj); + lua_rawget(L, -2); + // Stack: objcache, cached value. + bool create = lua_isnil(L, -1) || + // A corner case: it is possible for the value to be GC'd + // already, in which case we should evict this entry and create + // a new one. + ((lupb_refcounted*)lua_touserdata(L, -1))->refcounted == NULL; + if (create) { + // Remove bad cached value and push new value. + lua_pop(L, 1); + + // We take advantage of the fact that all of our objects are currently a + // single pointer, and thus have the same layout. + // TODO: this probably violates aliasing. + ud = lua_newuserdata(L, sizeof(lupb_refcounted)); + ud->refcounted = obj; + upb_refcounted_donateref(obj, owner, ud); + + luaL_getmetatable(L, type); + assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. + lua_setmetatable(L, -2); + + // Set it in the cache. + lua_pushlightuserdata(L, (void*)obj); + lua_pushvalue(L, -2); + lua_rawset(L, -4); + } else { + // Existing wrapper obj already has a ref. + ud = lua_touserdata(L, -1); + upb_refcounted_checkref(obj, ud); + if (owner) + upb_refcounted_unref(obj, owner); + } + lua_insert(L, -2); + lua_pop(L, 1); + return create; +} + +static void lupb_refcounted_pushnewrapper(lua_State *L, upb_refcounted *obj, + const char *type, const void *owner) { + bool created = lupb_refcounted_pushwrapper(L, obj, type, owner); + UPB_ASSERT_VAR(created, created == true); +} + + +/* lupb_def *******************************************************************/ + +static const upb_def *lupb_def_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_testudata(L, narg, LUPB_MSGDEF); + if (!r) r = luaL_testudata(L, narg, LUPB_ENUMDEF); + if (!r) r = luaL_testudata(L, narg, LUPB_FIELDDEF); + if (!r) luaL_typerror(L, narg, "upb def"); + if (!r->refcounted) luaL_error(L, "called into dead def"); + return r->def; +} + +static upb_def *lupb_def_checkmutable(lua_State *L, int narg) { + const upb_def *def = lupb_def_check(L, narg); + if (upb_def_isfrozen(def)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_def*)def; +} + +bool lupb_def_pushwrapper(lua_State *L, const upb_def *def, const void *owner) { + if (def == NULL) { + lua_pushnil(L); + return false; + } + + const char *type = NULL; + switch (def->type) { + case UPB_DEF_MSG: type = LUPB_MSGDEF; break; + case UPB_DEF_ENUM: type = LUPB_ENUMDEF; break; + case UPB_DEF_FIELD: type = LUPB_FIELDDEF; break; + default: luaL_error(L, "unknown deftype %d", def->type); + } + return lupb_refcounted_pushwrapper(L, UPB_UPCAST(def), type, owner); +} + +void lupb_def_pushnewrapper(lua_State *L, const upb_def *def, + const void *owner) { + bool created = lupb_def_pushwrapper(L, def, owner); + UPB_ASSERT_VAR(created, created == true); +} + +static int lupb_def_type(lua_State *L) { + const upb_def *def = lupb_def_check(L, 1); + lua_pushnumber(L, upb_def_type(def)); + return 1; +} + +static int lupb_def_isfrozen(lua_State *L) { + const upb_def *def = lupb_def_check(L, 1); + lua_pushboolean(L, upb_def_isfrozen(def)); + return 1; +} + +static int lupb_def_fullname(lua_State *L) { + const upb_def *def = lupb_def_check(L, 1); + lua_pushstring(L, upb_def_fullname(def)); + return 1; +} + +static int lupb_def_setfullname(lua_State *L) { + CHK(upb_def_setfullname(lupb_def_checkmutable(L, 1), chkname(L, 2), &status)); + return 0; +} + +#define LUPB_COMMON_DEF_METHODS \ + {"def_type", lupb_def_type}, \ + {"full_name", lupb_def_fullname}, \ + {"is_frozen", lupb_def_isfrozen}, \ + {"set_full_name", lupb_def_setfullname}, \ + + +/* lupb_fielddef **************************************************************/ + +static const upb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_FIELDDEF); + if (!r) luaL_typerror(L, narg, "upb fielddef"); + if (!r->refcounted) luaL_error(L, "called into dead fielddef"); + return upb_downcast_fielddef(r->def); +} + +static upb_fielddef *lupb_fielddef_checkmutable(lua_State *L, int narg) { + const upb_fielddef *f = lupb_fielddef_check(L, narg); + if (upb_fielddef_isfrozen(f)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_fielddef*)f; +} + +// Setter functions; these are called by both the constructor and the individual +// setter API calls like field:set_type(). + +static void lupb_fielddef_dosetdefault(lua_State *L, upb_fielddef *f, + int narg) { + int type = lua_type(L, narg); + upb_fieldtype_t upbtype = upb_fielddef_type(f); + if (type == LUA_TSTRING) { + if (!upb_fielddef_isstring(f) && upbtype != UPB_TYPE_ENUM) + luaL_argerror(L, narg, "field does not expect a string default"); + size_t len; + const char *str = lua_tolstring(L, narg, &len); + CHK(upb_fielddef_setdefaultstr(f, str, len, &status)); + } else { + lupb_setdefault(L, narg, f); + } +} + +static void lupb_fielddef_dosetisextension(lua_State *L, upb_fielddef *f, + int narg) { + CHK(upb_fielddef_setisextension(f, chkbool(L, narg, "is_extension"))); +} + +static void lupb_fielddef_dosetlabel(lua_State *L, upb_fielddef *f, int narg) { + int label = luaL_checkint(L, narg); + if (!upb_fielddef_checklabel(label)) + luaL_argerror(L, narg, "invalid field label"); + upb_fielddef_setlabel(f, label); +} + +static void lupb_fielddef_dosetname(lua_State *L, upb_fielddef *f, int narg) { + CHK(upb_fielddef_setname(f, chkname(L, narg), &status)); +} + +static void lupb_fielddef_dosetnumber(lua_State *L, upb_fielddef *f, int narg) { + CHK(upb_fielddef_setnumber(f, luaL_checkint(L, narg), &status)); +} + +static void lupb_fielddef_dosetsubdef(lua_State *L, upb_fielddef *f, int narg) { + const upb_def *def = NULL; + if (!lua_isnil(L, narg)) + def = lupb_def_check(L, narg); + CHK(upb_fielddef_setsubdef(f, def, &status)); +} + +static void lupb_fielddef_dosetsubdefname(lua_State *L, upb_fielddef *f, + int narg) { + const char *name = NULL; + if (!lua_isnil(L, narg)) + name = chkname(L, narg); + CHK(upb_fielddef_setsubdefname(f, name, &status)); +} + +static void lupb_fielddef_dosetcontainingtypename(lua_State *L, upb_fielddef *f, + int narg) { + const char *name = NULL; + if (!lua_isnil(L, narg)) + name = chkname(L, narg); + CHK(upb_fielddef_setcontainingtypename(f, name, &status)); +} + +static void lupb_fielddef_dosettype(lua_State *L, upb_fielddef *f, int narg) { + int type = luaL_checkint(L, narg); + if (!upb_fielddef_checktype(type)) + luaL_argerror(L, narg, "invalid field type"); + upb_fielddef_settype(f, type); +} + +static void lupb_fielddef_dosetintfmt(lua_State *L, upb_fielddef *f, int narg) { + int32_t intfmt = luaL_checknumber(L, narg); + if (!upb_fielddef_checkintfmt(intfmt)) + luaL_argerror(L, narg, "invalid intfmt"); + upb_fielddef_setintfmt(f, intfmt); +} + +static void lupb_fielddef_dosettagdelim(lua_State *L, upb_fielddef *f, + int narg) { + CHK(upb_fielddef_settagdelim(f, chkbool(L, narg, "tagdelim"))); +} + +// Setter API calls. These use the setter functions above. + +static int lupb_fielddef_setcontainingtypename(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetcontainingtypename(L, f, 2); + return 0; +} + +static int lupb_fielddef_setdefault(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetdefault(L, f, 2); + return 0; +} + +static int lupb_fielddef_setisextension(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetisextension(L, f, 2); + return 0; +} + +static int lupb_fielddef_setlabel(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetlabel(L, f, 2); + return 0; +} + +static int lupb_fielddef_setname(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetname(L, f, 2); + return 0; +} + +static int lupb_fielddef_setnumber(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetnumber(L, f, 2); + return 0; +} + +static int lupb_fielddef_setsubdef(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetsubdef(L, f, 2); + return 0; +} + +static int lupb_fielddef_setsubdefname(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetsubdefname(L, f, 2); + return 0; +} + +static int lupb_fielddef_settype(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosettype(L, f, 2); + return 0; +} + +static int lupb_fielddef_setintfmt(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetintfmt(L, f, 2); + return 0; +} + +static int lupb_fielddef_settagdelim(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosettagdelim(L, f, 2); + return 0; +} + +// Constructor and other methods. + +static int lupb_fielddef_new(lua_State *L) { + upb_fielddef *f = upb_fielddef_new(&f); + int narg = lua_gettop(L); + + lupb_def_pushnewrapper(L, UPB_UPCAST(f), &f); + + if (narg == 0) return 1; + + // User can specify initialization values like so: + // upb.FieldDef{label=upb.LABEL_REQUIRED, name="my_field", number=5, + // type=upb.TYPE_INT32, default_value=12, type_name="Foo"} + luaL_checktype(L, 1, LUA_TTABLE); + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { + luaL_checktype(L, -2, LUA_TSTRING); + const char *key = lua_tostring(L, -2); + int v = -1; + if (streql(key, "name")) lupb_fielddef_dosetname(L, f, v); + else if (streql(key, "number")) lupb_fielddef_dosetnumber(L, f, v); + else if (streql(key, "type")) lupb_fielddef_dosettype(L, f, v); + else if (streql(key, "label")) lupb_fielddef_dosetlabel(L, f, v); + else if (streql(key, "is_extension")) + lupb_fielddef_dosetisextension(L, f, v); + else if (streql(key, "containing_type_name")) + lupb_fielddef_dosetcontainingtypename(L, f, v); + else if (streql(key, "default_value")) ; // Defer to second pass. + else if (streql(key, "subdef")) ; // Defer to second pass. + else if (streql(key, "subdef_name")) ; // Defer to second pass. + else luaL_error(L, "Cannot set fielddef member '%s'", key); + } + + // Have to do these in a second pass because these depend on the type, so we + // have to make sure the type is set if the user specified one. + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { + const char *key = lua_tostring(L, -2); + int v = -1; + if (streql(key, "default_value")) lupb_fielddef_dosetdefault(L, f, v); + else if (streql(key, "subdef")) lupb_fielddef_dosetsubdef(L, f, v); + else if (streql(key, "subdef_name")) lupb_fielddef_dosetsubdefname(L, f, v); + } + + return 1; +} + +static int lupb_fielddef_default(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + int32: + lua_pushnumber(L, upb_fielddef_defaultint32(f)); break; + case UPB_TYPE_INT64: + lua_pushnumber(L, upb_fielddef_defaultint64(f)); break; + case UPB_TYPE_UINT32: + lua_pushnumber(L, upb_fielddef_defaultuint32(f)); break; + case UPB_TYPE_UINT64: + lua_pushnumber(L, upb_fielddef_defaultuint64(f)); break; + case UPB_TYPE_DOUBLE: + lua_pushnumber(L, upb_fielddef_defaultdouble(f)); break; + case UPB_TYPE_FLOAT: + lua_pushnumber(L, upb_fielddef_defaultfloat(f)); break; + case UPB_TYPE_BOOL: + lua_pushboolean(L, upb_fielddef_defaultbool(f)); break; + case UPB_TYPE_ENUM: + if (!upb_fielddef_default_is_symbolic(f)) + goto int32; + // Fallthrough. + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + size_t len; + const char *data = upb_fielddef_defaultstr(f, &len); + lua_pushlstring(L, data, len); + break; + } + case UPB_TYPE_MESSAGE: + return luaL_error(L, "Message fields do not have explicit defaults."); + } + return 1; +} + +static int lupb_fielddef_getsel(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + upb_selector_t sel; + if (upb_handlers_getselector(f, luaL_checknumber(L, 2), &sel)) { + lua_pushnumber(L, sel); + return 1; + } else { + return 0; + } +} + +static int lupb_fielddef_label(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushnumber(L, upb_fielddef_label(f)); + return 1; +} + +static int lupb_fielddef_name(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushstring(L, upb_fielddef_name(f)); + return 1; +} + +static int lupb_fielddef_number(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + int32_t num = upb_fielddef_number(f); + if (num) + lua_pushnumber(L, num); + else + lua_pushnil(L); + return 1; +} + +static int lupb_fielddef_selectorbase(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + if (!upb_fielddef_isfrozen(f)) + luaL_error(L, "_selectorbase is only defined for frozen fielddefs"); + lua_pushnumber(L, f->selector_base); + return 1; +} + +static int lupb_fielddef_hassubdef(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_hassubdef(f)); + return 1; +} + +static int lupb_fielddef_containingtype(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lupb_def_pushwrapper(L, UPB_UPCAST(upb_fielddef_containingtype(f)), NULL); + return 1; +} + +static int lupb_fielddef_containingtypename(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lua_pushstring(L, upb_fielddef_containingtypename(f)); + return 1; +} + +static int lupb_fielddef_subdef(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + if (!upb_fielddef_hassubdef(f)) + luaL_error(L, "Tried to get subdef of non-message field"); + const upb_def *def = upb_fielddef_subdef(f); + lupb_def_pushwrapper(L, def, NULL); + return 1; +} + +static int lupb_fielddef_subdefname(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + if (!upb_fielddef_hassubdef(f)) + luaL_error(L, "Tried to get subdef name of non-message field"); + lua_pushstring(L, upb_fielddef_subdefname(f)); + return 1; +} + +static int lupb_fielddef_type(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + if (upb_fielddef_typeisset(f)) + lua_pushnumber(L, upb_fielddef_type(f)); + else + lua_pushnil(L); + return 1; +} + +static int lupb_fielddef_index(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushnumber(L, upb_fielddef_index(f)); + return 1; +} + +static int lupb_fielddef_intfmt(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushnumber(L, upb_fielddef_intfmt(f)); + return 1; +} + +static int lupb_fielddef_isextension(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_isextension(f)); + return 1; +} + +static int lupb_fielddef_istagdelim(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_istagdelim(f)); + return 1; +} + +static int lupb_fielddef_gc(lua_State *L) { + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_FIELDDEF); + upb_def_unref(r->def, r); + r->refcounted = NULL; + return 0; +} + +static const struct luaL_Reg lupb_fielddef_m[] = { + LUPB_COMMON_DEF_METHODS + + {"containing_type", lupb_fielddef_containingtype}, + {"containing_type_name", lupb_fielddef_containingtypename}, + {"default", lupb_fielddef_default}, + {"getsel", lupb_fielddef_getsel}, + {"has_subdef", lupb_fielddef_hassubdef}, + {"index", lupb_fielddef_index}, + {"intfmt", lupb_fielddef_intfmt}, + {"is_extension", lupb_fielddef_isextension}, + {"istagdelim", lupb_fielddef_istagdelim}, + {"label", lupb_fielddef_label}, + {"name", lupb_fielddef_name}, + {"number", lupb_fielddef_number}, + {"subdef", lupb_fielddef_subdef}, + {"subdef_name", lupb_fielddef_subdefname}, + {"type", lupb_fielddef_type}, + + {"set_containing_type_name", lupb_fielddef_setcontainingtypename}, + {"set_default", lupb_fielddef_setdefault}, + {"set_is_extension", lupb_fielddef_setisextension}, + {"set_label", lupb_fielddef_setlabel}, + {"set_name", lupb_fielddef_setname}, + {"set_number", lupb_fielddef_setnumber}, + {"set_subdef", lupb_fielddef_setsubdef}, + {"set_subdef_name", lupb_fielddef_setsubdefname}, + {"set_type", lupb_fielddef_settype}, + {"set_intfmt", lupb_fielddef_setintfmt}, + {"set_tagdelim", lupb_fielddef_settagdelim}, + + // Internal-only. + {"_selector_base", lupb_fielddef_selectorbase}, + + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_fielddef_mm[] = { + {"__gc", lupb_fielddef_gc}, + {NULL, NULL} +}; + + +/* lupb_msgdef ****************************************************************/ + +const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_MSGDEF); + if (!r) luaL_typerror(L, narg, LUPB_MSGDEF); + if (!r->refcounted) luaL_error(L, "called into dead msgdef"); + return upb_downcast_msgdef(r->def); +} + +static upb_msgdef *lupb_msgdef_checkmutable(lua_State *L, int narg) { + const upb_msgdef *m = lupb_msgdef_check(L, narg); + if (upb_msgdef_isfrozen(m)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_msgdef*)m; +} + +static int lupb_msgdef_gc(lua_State *L) { + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_MSGDEF); + upb_def_unref(r->def, r); + r->refcounted = NULL; + return 0; +} + +static int lupb_msgdef_new(lua_State *L) { + int narg = lua_gettop(L); + upb_msgdef *md = upb_msgdef_new(&md); + lupb_def_pushnewrapper(L, UPB_UPCAST(md), &md); + + if (narg == 0) return 1; + + // User can specify initialization values like so: + // upb.MessageDef{full_name="MyMessage", extstart=8000, fields={...}} + luaL_checktype(L, 1, LUA_TTABLE); + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { + luaL_checktype(L, -2, LUA_TSTRING); + const char *key = lua_tostring(L, -2); + + if (streql(key, "full_name")) { // full_name="MyMessage" + CHK(upb_def_setfullname(UPB_UPCAST(md), chkname(L, -1), &status)); + } else if (streql(key, "fields")) { // fields={...} + // Iterate over the list of fields. + luaL_checktype(L, -1, LUA_TTABLE); + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, -1); + CHK(upb_msgdef_addfield(md, f, NULL, &status)); + } + } else { + // TODO: extrange= + luaL_error(L, "Unknown initializer key '%s'", key); + } + } + return 1; +} + +static int lupb_msgdef_add(lua_State *L) { + upb_msgdef *m = lupb_msgdef_checkmutable(L, 1); + luaL_checktype(L, 2, LUA_TTABLE); + int n = lua_rawlen(L, 2); + // TODO: add upb interface that lets us avoid this malloc/free. + upb_fielddef **fields = malloc(n * sizeof(upb_fielddef*)); + for (int i = 0; i < n; i++) { + lua_rawgeti(L, -1, i + 1); + fields[i] = lupb_fielddef_checkmutable(L, -1); + lua_pop(L, 1); + } + + upb_status status = UPB_STATUS_INIT; + upb_msgdef_addfields(m, fields, n, NULL, &status); + free(fields); + lupb_checkstatus(L, &status); + return 0; +} + +static int lupb_msgdef_len(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lua_pushinteger(L, upb_msgdef_numfields(m)); + return 1; +} + +static int lupb_msgdef_selectorcount(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lua_pushinteger(L, m->selector_count); + return 1; +} + +static int lupb_msgdef_submsgfieldcount(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lua_pushinteger(L, m->submsg_field_count); + return 1; +} + +static int lupb_msgdef_field(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + int type = lua_type(L, 2); + const upb_fielddef *f; + if (type == LUA_TNUMBER) { + f = upb_msgdef_itof(m, lua_tointeger(L, 2)); + } else if (type == LUA_TSTRING) { + f = upb_msgdef_ntof(m, lua_tostring(L, 2)); + } else { + const char *msg = lua_pushfstring(L, "number or string expected, got %s", + luaL_typename(L, 2)); + return luaL_argerror(L, 2, msg); + } + + lupb_def_pushwrapper(L, UPB_UPCAST(f), NULL); + return 1; +} + +static int lupb_msgiter_next(lua_State *L) { + upb_msg_iter *i = lua_touserdata(L, lua_upvalueindex(1)); + if (upb_msg_done(i)) return 0; + lupb_def_pushwrapper(L, UPB_UPCAST(upb_msg_iter_field(i)), NULL); + upb_msg_next(i); + return 1; +} + +static int lupb_msgdef_fields(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + upb_msg_iter *i = lua_newuserdata(L, sizeof(upb_msg_iter)); + upb_msg_begin(i, m); + lua_pushcclosure(L, &lupb_msgiter_next, 1); + return 1; +} + +static const struct luaL_Reg lupb_msgdef_mm[] = { + {"__gc", lupb_msgdef_gc}, + {"__len", lupb_msgdef_len}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_msgdef_m[] = { + LUPB_COMMON_DEF_METHODS + {"add", lupb_msgdef_add}, + {"field", lupb_msgdef_field}, + {"fields", lupb_msgdef_fields}, + + // Internal-only. + {"_selector_count", lupb_msgdef_selectorcount}, + {"_submsg_field_count", lupb_msgdef_submsgfieldcount}, + + {NULL, NULL} +}; + + +/* lupb_enumdef ***************************************************************/ + +const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_ENUMDEF); + if (!r) luaL_typerror(L, narg, LUPB_ENUMDEF); + if (!r->refcounted) luaL_error(L, "called into dead enumdef"); + return upb_downcast_enumdef(r->def); +} + +static upb_enumdef *lupb_enumdef_checkmutable(lua_State *L, int narg) { + const upb_enumdef *f = lupb_enumdef_check(L, narg); + if (upb_enumdef_isfrozen(f)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_enumdef*)f; +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_ENUMDEF); + upb_def_unref(r->def, r); + r->refcounted = NULL; + return 0; +} + +static int lupb_enumdef_new(lua_State *L) { + int narg = lua_gettop(L); + upb_enumdef *e = upb_enumdef_new(&e); + lupb_def_pushnewrapper(L, UPB_UPCAST(e), &e); + + if (narg == 0) return 1; + + // User can specify initialization values like so: + // upb.EnumDef{full_name="MyEnum", + // values={ + // {"FOO_VALUE_1", 1}, + // {"FOO_VALUE_2", 2} + // } + // } + luaL_checktype(L, 1, LUA_TTABLE); + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { + luaL_checktype(L, -2, LUA_TSTRING); + const char *key = lua_tostring(L, -2); + if (streql(key, "values")) { + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + lua_rawgeti(L, -1, 1); + luaL_checktype(L, -1, LUA_TSTRING); + const char *name = lua_tostring(L, -1); + lua_rawgeti(L, -2, 2); + CHK(upb_enumdef_addval(e, name, chkint32(L, -1, "value"), &status)); + lua_pop(L, 2); // The key/val we got from lua_rawgeti() + } + } else if (streql(key, "full_name")) { + CHK(upb_def_setfullname(UPB_UPCAST(e), chkname(L, -1), &status)); + } else { + luaL_error(L, "Unknown initializer key '%s'", key); + } + } + return 1; +} + +static int lupb_enumdef_add(lua_State *L) { + upb_enumdef *e = lupb_enumdef_checkmutable(L, 1); + CHK(upb_enumdef_addval(e, chkname(L, 2), chkint32(L, 3, "value"), &status)); + return 0; +} + +static int lupb_enumdef_len(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + lua_pushinteger(L, upb_enumdef_numvals(e)); + return 1; +} + +static int lupb_enumdef_value(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + int type = lua_type(L, 2); + if (type == LUA_TNUMBER) { + // Pushes "nil" for a NULL pointer. + lua_pushstring(L, upb_enumdef_iton(e, chkint32(L, 2, "value"))); + } else if (type == LUA_TSTRING) { + int32_t num; + if (upb_enumdef_ntoi(e, lua_tostring(L, 2), &num)) { + lua_pushnumber(L, num); + } else { + lua_pushnil(L); + } + } else { + const char *msg = lua_pushfstring(L, "number or string expected, got %s", + luaL_typename(L, 2)); + return luaL_argerror(L, 2, msg); + } + return 1; +} + +static int lupb_enumiter_next(lua_State *L) { + upb_enum_iter *i = lua_touserdata(L, lua_upvalueindex(1)); + if (upb_enum_done(i)) return 0; + lua_pushstring(L, upb_enum_iter_name(i)); + lua_pushnumber(L, upb_enum_iter_number(i)); + upb_enum_next(i); + return 2; +} + +static int lupb_enumdef_values(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + upb_enum_iter *i = lua_newuserdata(L, sizeof(upb_enum_iter)); + upb_enum_begin(i, e); + lua_pushcclosure(L, &lupb_enumiter_next, 1); + return 1; +} + +static const struct luaL_Reg lupb_enumdef_mm[] = { + {"__gc", lupb_enumdef_gc}, + {"__len", lupb_enumdef_len}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_enumdef_m[] = { + LUPB_COMMON_DEF_METHODS + {"add", lupb_enumdef_add}, + {"value", lupb_enumdef_value}, + {"values", lupb_enumdef_values}, + {NULL, NULL} +}; + + +/* lupb_symtab ****************************************************************/ + +// Inherits a ref on the symtab. +// Checks that narg is a proper lupb_symtab object. If it is, leaves its +// metatable on the stack for cache lookups/updates. +upb_symtab *lupb_symtab_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_SYMTAB); + if (!r) luaL_typerror(L, narg, LUPB_SYMTAB); + if (!r->refcounted) luaL_error(L, "called into dead symtab"); + return r->symtab; +} + +// narg is a lua table containing a list of defs to add. +void lupb_symtab_doadd(lua_State *L, upb_symtab *s, int narg) { + luaL_checktype(L, narg, LUA_TTABLE); + // Iterate over table twice. First iteration to count entries and + // check constraints. + int n = 0; + for (lua_pushnil(L); lua_next(L, narg); lua_pop(L, 1)) { + lupb_def_check(L, -1); + ++n; + } + + // Second iteration to build deflist and layout. + upb_def **defs = malloc(n * sizeof(*defs)); + n = 0; + for (lua_pushnil(L); lua_next(L, narg); lua_pop(L, 1)) { + upb_def *def = lupb_def_checkmutable(L, -1); + defs[n++] = def; + } + + upb_status status = UPB_STATUS_INIT; + upb_symtab_add(s, defs, n, NULL, &status); + free(defs); + lupb_checkstatus(L, &status); +} + +static int lupb_symtab_new(lua_State *L) { + int narg = lua_gettop(L); + upb_symtab *s = upb_symtab_new(&s); + lupb_refcounted_pushnewrapper(L, UPB_UPCAST(s), LUPB_SYMTAB, &s); + if (narg > 0) lupb_symtab_doadd(L, s, 1); + return 1; +} + +static int lupb_symtab_add(lua_State *L) { + lupb_symtab_doadd(L, lupb_symtab_check(L, 1), 2); + return 0; +} + +static int lupb_symtab_gc(lua_State *L) { + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_SYMTAB); + upb_symtab_unref(r->symtab, r); + r->refcounted = NULL; + return 0; +} + +static int lupb_symtab_lookup(lua_State *L) { + upb_symtab *s = lupb_symtab_check(L, 1); + for (int i = 2; i <= lua_gettop(L); i++) { + const upb_def *def = + upb_symtab_lookup(s, luaL_checkstring(L, i), &def); + lupb_def_pushwrapper(L, def, &def); + lua_replace(L, i); + } + return lua_gettop(L) - 1; +} + +static int lupb_symtab_getdefs(lua_State *L) { + upb_symtab *s = lupb_symtab_check(L, 1); + upb_deftype_t type = luaL_checkint(L, 2); + int count; + const upb_def **defs = upb_symtab_getdefs(s, type, &defs, &count); + + // Create the table in which we will return the defs. + lua_createtable(L, count, 0); + for (int i = 0; i < count; i++) { + const upb_def *def = defs[i]; + lupb_def_pushwrapper(L, def, &defs); + lua_rawseti(L, -2, i + 1); + } + free(defs); + return 1; +} + +// This is a *temporary* API that will be removed once pending refactorings are +// complete (it does not belong here in core because it depends on both +// the descriptor.proto schema and the protobuf binary format. +static int lupb_symtab_load_descriptor(lua_State *L) { + size_t len; + upb_symtab *s = lupb_symtab_check(L, 1); + const char *str = luaL_checklstring(L, 2, &len); + CHK(upb_load_descriptor_into_symtab(s, str, len, &status)); + return 0; +} + +static const struct luaL_Reg lupb_symtab_m[] = { + {"add", lupb_symtab_add}, + {"getdefs", lupb_symtab_getdefs}, + {"lookup", lupb_symtab_lookup}, + {"load_descriptor", lupb_symtab_load_descriptor}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_symtab_mm[] = { + {"__gc", lupb_symtab_gc}, + {NULL, NULL} +}; + + +/* lupb toplevel **************************************************************/ + +static int lupb_def_freeze(lua_State *L) { + int n = lua_gettop(L); + upb_def **defs = malloc(n * sizeof(upb_def*)); + for (int i = 0; i < n; i++) { + // Could allow an array of defs here also. + defs[i] = lupb_def_checkmutable(L, i + 1); + } + upb_status s = UPB_STATUS_INIT; + upb_def_freeze(defs, n, &s); + free(defs); + lupb_checkstatus(L, &s); + return 0; +} + +static const struct luaL_Reg lupb_toplevel_m[] = { + {"EnumDef", lupb_enumdef_new}, + {"FieldDef", lupb_fielddef_new}, + {"MessageDef", lupb_msgdef_new}, + {"SymbolTable", lupb_symtab_new}, + {"freeze", lupb_def_freeze}, + + {NULL, NULL} +}; + +// Register the given type with the given methods and metamethods. +static void lupb_register_type(lua_State *L, const char *name, + const luaL_Reg *m, const luaL_Reg *mm) { + luaL_newmetatable(L, name); + lupb_setfuncs(L, mm); // Register all mm in the metatable. + lua_createtable(L, 0, 0); + // Methods go in the mt's __index method. This implies that you can't + // implement __index. + lupb_setfuncs(L, m); + lua_setfield(L, -2, "__index"); + lua_pop(L, 1); // The mt. +} + +static void lupb_setfieldi(lua_State *L, const char *field, int i) { + lua_pushnumber(L, i); + lua_setfield(L, -2, field); +} + +int luaopen_upb(lua_State *L) { + lupb_register_type(L, LUPB_MSGDEF, lupb_msgdef_m, lupb_msgdef_mm); + lupb_register_type(L, LUPB_ENUMDEF, lupb_enumdef_m, lupb_enumdef_mm); + lupb_register_type(L, LUPB_FIELDDEF, lupb_fielddef_m, lupb_fielddef_mm); + lupb_register_type(L, LUPB_SYMTAB, lupb_symtab_m, lupb_symtab_mm); + + // Create our object cache. + lua_newtable(L); + lua_createtable(L, 0, 1); // Cache metatable. + lua_pushstring(L, "v"); // Values are weak. + lua_setfield(L, -2, "__mode"); + lua_setmetatable(L, -2); + lua_setfield(L, LUA_REGISTRYINDEX, LUPB_OBJCACHE); + + lupb_newlib(L, "upb", lupb_toplevel_m); + + // Define a couple functions as Lua source (kept here instead of a separate + // Lua file so that upb.so is self-contained) + const char *lua_source = + "return function(upb)\n" + " upb.build_defs = function(defs)\n" + " local symtab = upb.SymbolTable(defs)\n" + " return symtab:getdefs(upb.DEF_ANY)\n" + " end\n" + "end"; + + if (luaL_dostring(L, lua_source) != 0) + lua_error(L); + + // Call the chunk that will define the extra functions on upb, passing our + // package dictionary as the argument. + lua_pushvalue(L, -2); + lua_call(L, 1, 0); + + // Register constants. + lupb_setfieldi(L, "LABEL_OPTIONAL", UPB_LABEL_OPTIONAL); + lupb_setfieldi(L, "LABEL_REQUIRED", UPB_LABEL_REQUIRED); + lupb_setfieldi(L, "LABEL_REPEATED", UPB_LABEL_REPEATED); + + lupb_setfieldi(L, "TYPE_DOUBLE", UPB_TYPE_DOUBLE); + lupb_setfieldi(L, "TYPE_FLOAT", UPB_TYPE_FLOAT); + lupb_setfieldi(L, "TYPE_INT64", UPB_TYPE_INT64); + lupb_setfieldi(L, "TYPE_UINT64", UPB_TYPE_UINT64); + lupb_setfieldi(L, "TYPE_INT32", UPB_TYPE_INT32); + lupb_setfieldi(L, "TYPE_BOOL", UPB_TYPE_BOOL); + lupb_setfieldi(L, "TYPE_STRING", UPB_TYPE_STRING); + lupb_setfieldi(L, "TYPE_MESSAGE", UPB_TYPE_MESSAGE); + lupb_setfieldi(L, "TYPE_BYTES", UPB_TYPE_BYTES); + lupb_setfieldi(L, "TYPE_UINT32", UPB_TYPE_UINT32); + lupb_setfieldi(L, "TYPE_ENUM", UPB_TYPE_ENUM); + + lupb_setfieldi(L, "INTFMT_VARIABLE", UPB_INTFMT_VARIABLE); + lupb_setfieldi(L, "INTFMT_FIXED", UPB_INTFMT_FIXED); + lupb_setfieldi(L, "INTFMT_ZIGZAG", UPB_INTFMT_ZIGZAG); + + lupb_setfieldi(L, "DESCRIPTOR_TYPE_DOUBLE", UPB_DESCRIPTOR_TYPE_DOUBLE); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_FLOAT", UPB_DESCRIPTOR_TYPE_FLOAT); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT64", UPB_DESCRIPTOR_TYPE_INT64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT64", UPB_DESCRIPTOR_TYPE_UINT64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT32", UPB_DESCRIPTOR_TYPE_INT32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED64", UPB_DESCRIPTOR_TYPE_FIXED64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED32", UPB_DESCRIPTOR_TYPE_FIXED32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_BOOL", UPB_DESCRIPTOR_TYPE_BOOL); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_STRING", UPB_DESCRIPTOR_TYPE_STRING); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_GROUP", UPB_DESCRIPTOR_TYPE_GROUP); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_MESSAGE", UPB_DESCRIPTOR_TYPE_MESSAGE); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_BYTES", UPB_DESCRIPTOR_TYPE_BYTES); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT32", UPB_DESCRIPTOR_TYPE_UINT32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_ENUM", UPB_DESCRIPTOR_TYPE_ENUM); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED32", UPB_DESCRIPTOR_TYPE_SFIXED32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED64", UPB_DESCRIPTOR_TYPE_SFIXED64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT32", UPB_DESCRIPTOR_TYPE_SINT32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT64", UPB_DESCRIPTOR_TYPE_SINT64); + + lupb_setfieldi(L, "DEF_MSG", UPB_DEF_MSG); + lupb_setfieldi(L, "DEF_FIELD", UPB_DEF_FIELD); + lupb_setfieldi(L, "DEF_ENUM", UPB_DEF_ENUM); + lupb_setfieldi(L, "DEF_SERVICE", UPB_DEF_SERVICE); + lupb_setfieldi(L, "DEF_ANY", UPB_DEF_ANY); + + lupb_setfieldi(L, "HANDLER_INT32", UPB_HANDLER_INT32); + lupb_setfieldi(L, "HANDLER_INT64", UPB_HANDLER_INT64); + lupb_setfieldi(L, "HANDLER_UINT32", UPB_HANDLER_UINT32); + lupb_setfieldi(L, "HANDLER_UINT64", UPB_HANDLER_UINT64); + lupb_setfieldi(L, "HANDLER_FLOAT", UPB_HANDLER_FLOAT); + lupb_setfieldi(L, "HANDLER_DOUBLE", UPB_HANDLER_DOUBLE); + lupb_setfieldi(L, "HANDLER_BOOL", UPB_HANDLER_BOOL); + lupb_setfieldi(L, "HANDLER_STARTSTR", UPB_HANDLER_STARTSTR); + lupb_setfieldi(L, "HANDLER_STRING", UPB_HANDLER_STRING); + lupb_setfieldi(L, "HANDLER_ENDSTR", UPB_HANDLER_ENDSTR); + lupb_setfieldi(L, "HANDLER_STARTSUBMSG", UPB_HANDLER_STARTSUBMSG); + lupb_setfieldi(L, "HANDLER_ENDSUBMSG", UPB_HANDLER_ENDSUBMSG); + lupb_setfieldi(L, "HANDLER_STARTSEQ", UPB_HANDLER_STARTSEQ); + lupb_setfieldi(L, "HANDLER_ENDSEQ", UPB_HANDLER_ENDSEQ); + + return 1; // Return package table. +} + +// Alternate names so that the library can be loaded as upb5_1 etc. +int LUPB_OPENFUNC(upb)(lua_State *L) { return luaopen_upb(L); } diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h new file mode 100644 index 0000000..e6b4f2f --- /dev/null +++ b/upb/bindings/lua/upb.h @@ -0,0 +1,45 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Shared definitions for upb Lua modules. + */ + +#ifndef UPB_LUA_UPB_H_ +#define UPB_LUA_UPB_H_ + +#include "upb/def.h" + +// Lua 5.1/5.2 compatibility code. +#if LUA_VERSION_NUM == 501 + +#define lua_rawlen lua_objlen +#define lupb_newlib(L, name, l) luaL_register(L, name, l) +#define lupb_setfuncs(L, l) luaL_register(L, NULL, l) +#define LUPB_OPENFUNC(mod) luaopen_ ## mod ## upb5_1 + +void *luaL_testudata(lua_State *L, int ud, const char *tname); + +#elif LUA_VERSION_NUM == 502 + +// Lua 5.2 modules are not expected to set a global variable, so "name" is +// unused. +#define lupb_newlib(L, name, l) luaL_newlib(L, l) +#define lupb_setfuncs(L, l) luaL_setfuncs(L, l, 0) +int luaL_typerror(lua_State *L, int narg, const char *tname); +#define LUPB_OPENFUNC(mod) luaopen_ ## mod ## upb5_2 + +#else +#error Only Lua 5.1 and 5.2 are supported +#endif + +const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg); +const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg); +const char *lupb_checkname(lua_State *L, int narg); +bool lupb_def_pushwrapper(lua_State *L, const upb_def *def, const void *owner); +void lupb_def_pushnewrapper(lua_State *L, const upb_def *def, + const void *owner); + +#endif // UPB_LUA_UPB_H_ diff --git a/upb/bindings/python/setup.py b/upb/bindings/python/setup.py new file mode 100644 index 0000000..8abaff8 --- /dev/null +++ b/upb/bindings/python/setup.py @@ -0,0 +1,14 @@ +from distutils.core import setup, Extension + +setup(name='upb', + version='0.1', + ext_modules=[ + Extension('upb.__init__', ['upb.c'], + include_dirs=['../../'], + define_macros=[("UPB_UNALIGNED_READS_OK", 1)], + library_dirs=['../../upb'], + libraries=['upb_pic'], + ), + ], + packages=['upb'] + ) diff --git a/upb/bindings/python/test.py b/upb/bindings/python/test.py new file mode 100644 index 0000000..29a6c45 --- /dev/null +++ b/upb/bindings/python/test.py @@ -0,0 +1,72 @@ + +import upb +import unittest + +class TestFieldDef(unittest.TestCase): + def test_construction(self): + fielddef1 = upb.FieldDef() + self.assertTrue(fielddef1.number is None) + self.assertTrue(fielddef1.name is None) + self.assertTrue(fielddef1.type is None) + self.assertEqual(fielddef1.label, upb.LABEL_OPTIONAL) + + fielddef2 = upb.FieldDef(number=5, name="field2", + label=upb.LABEL_REQUIRED, type=upb.TYPE_INT32, + type_name="MyType") + + self.assertTrue(id(fielddef1) != id(fielddef2)) + self.assertEqual(fielddef2.number, 5) + self.assertEqual(fielddef2.name, "field2") + self.assertEqual(fielddef2.label, upb.LABEL_REQUIRED) + self.assertEqual(fielddef2.type, upb.TYPE_INT32) + self.assertEqual(fielddef2.type_name, "MyType") + + fielddef2.number = 8 + self.assertEqual(fielddef2.number, 8) + + fielddef2.name = "xxx" + self.assertEqual(fielddef2.name, "xxx") + + fielddef2.label = upb.LABEL_REPEATED + self.assertEqual(fielddef2.label, upb.LABEL_REPEATED) + + fielddef2.type = upb.TYPE_FLOAT + self.assertEqual(fielddef2.type, upb.TYPE_FLOAT) + + def test_nosubclasses(self): + def create_subclass(): + class MyClass(upb.FieldDef): + pass + + self.assertRaises(TypeError, create_subclass) + + # TODO: test that assigning invalid values is properly prevented. + +class TestMessageDef(unittest.TestCase): + def test_construction(self): + msgdef1 = upb.MessageDef() + self.assertTrue(msgdef1.fqname is None) + self.assertEqual(msgdef1.fields(), []) + + fields = [upb.FieldDef(number=1, name="field1", type=upb.TYPE_INT32)] + msgdef2 = upb.MessageDef(fqname="Message2", fields=fields) + + self.assertEqual(set(msgdef2.fields()), set(fields)) + + f2 = upb.FieldDef(number=2, name="field2", type=upb.TYPE_INT64) + msgdef2.add_field(f2) + + fields.append(f2) + self.assertEqual(set(msgdef2.fields()), set(fields)) + +class TestSymbolTable(unittest.TestCase): + def test_construction(self): + s = upb.SymbolTable() + self.assertEqual(s.defs(), []); + + s.add_def(upb.MessageDef(fqname="A")) + self.assertTrue(s.lookup("A") is not None) + self.assertTrue(s.lookup("A") is s.lookup("A")) + +if __name__ == '__main__': + unittest.main() diff --git a/upb/bindings/python/upb.c b/upb/bindings/python/upb.c new file mode 100644 index 0000000..497074b --- /dev/null +++ b/upb/bindings/python/upb.c @@ -0,0 +1,724 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Python extension exposing the core of upb: definitions, handlers, + * and a message type. + */ + +#include <stddef.h> +#include <Python.h> +#include "upb/def.h" +#include "upb/msg.h" + +static bool streql(const char *a, const char *b) { return strcmp(a, b) == 0; } + +PyObject *PyUpb_Error(const char *str) { + PyErr_SetString(PyExc_TypeError, str); + return NULL; +} + +int PyUpb_ErrorInt(const char *str) { + PyErr_SetString(PyExc_TypeError, str); + return -1; +} + +#define PyUpb_CheckStatus(status) \ + if (!upb_ok(status)) return PyUpb_Error((status)->str); + +static upb_accessor_vtbl *PyUpb_AccessorForField(upb_fielddef *f); + + +/* Object cache ***************************************************************/ + +// For objects that are just wrappers around a C object pointer, we keep a +// cache mapping C pointer -> wrapper object. This allows us to consistently +// vend the same Python object given the same C object. This prevents us from +// creating too many Python objects unnecessarily. Just as importantly, it +// provides the expected semantics: +// +// if field.subdef is field.subdef: +// print "Sanity prevails." +// +// If we conjured up a new wrapper object every time, the above would not be +// true. +// +// The cost is having to put all such objects in a table, but since this only +// applies to schema-level objects (defs, handlers, etc) this seems acceptable. +// We do *not* have to put all message objects in this table. +// +// We use weak refs so that the cache does not prevent the wrapper objects from +// being collected. The table is stored as a static variable; to use +// sub-interpreters this would need to change, but I believe that using +// sub-interpreters is exceedingly rare in practice. + +typedef struct { + PyObject_HEAD; + void *obj; + PyObject *weakreflist; +} PyUpb_ObjWrapper; + +static PyObject *obj_cache = NULL; +static PyObject *reverse_cache = NULL; +static PyObject *weakref_callback = NULL; + +// Utility functions for manipulating Python dictionaries keyed by pointer. + +static PyObject *PyUpb_StringForPointer(const void *ptr) { + PyObject *o = PyString_FromStringAndSize((const char *)&ptr, sizeof(void*)); + assert(o); + return o; +} + +static PyObject *PyUpb_ObjCacheDeleteCallback(PyObject *self, PyObject *ref) { + // Python very unfortunately clears the weakref before running our callback. + // This prevents us from using the weakref to find the C pointer we need to + // remove from the cache. As a result we are forced to keep a second map + // mapping weakref->C pointer. + PyObject *ptr_str = PyDict_GetItem(reverse_cache, ref); + assert(ptr_str); + int err = PyDict_DelItem(obj_cache, ptr_str); + assert(!err); + err = PyDict_DelItem(reverse_cache, ref); + assert(!err); + return Py_None; +} + +static PyObject *PyUpb_ObjCacheGet(const void *obj, PyTypeObject *type) { + PyObject *kv = PyUpb_StringForPointer(obj); + PyObject *ref = PyDict_GetItem(obj_cache, kv); + PyObject *ret; + if (ref) { + ret = PyWeakref_GetObject(ref); + assert(ret != Py_None); + Py_INCREF(ret); + } else { + PyUpb_ObjWrapper *wrapper = (PyUpb_ObjWrapper*)type->tp_alloc(type, 0); + wrapper->obj = (void*)obj; + wrapper->weakreflist = NULL; + ret = (PyObject*)wrapper; + ref = PyWeakref_NewRef(ret, weakref_callback); + assert(PyWeakref_GetObject(ref) == ret); + assert(ref); + PyDict_SetItem(obj_cache, kv, ref); + PyDict_SetItem(reverse_cache, ref, kv); + } + assert(ret); + Py_DECREF(kv); + return ret; +} + + +/* PyUpb_Def ******************************************************************/ + +static PyTypeObject *PyUpb_TypeForDef(const upb_def *def); + +static void PyUpb_Def_dealloc(PyObject *obj) { + PyUpb_ObjWrapper *wrapper = (void*)obj; + upb_def_unref((upb_def*)wrapper->obj); + obj->ob_type->tp_free(obj); +} + +PyObject *PyUpb_Def_GetOrCreate(const upb_def *def) { + return def ? PyUpb_ObjCacheGet(def, PyUpb_TypeForDef(def)) : Py_None; +} + +// Will need to expand once other kinds of defs are supported. +#define Check_Def(o, badret) Check_MessageDef(o, badret) + + +/* PyUpb_FieldDef *************************************************************/ + +static PyTypeObject PyUpb_FieldDefType; +static int PyUpb_FieldDef_setattro(PyObject *o, PyObject *key, PyObject *val); + +#define Check_FieldDef(o, badret) \ + (void*)(((PyUpb_ObjWrapper*)o)->obj); do { \ + if(!PyObject_TypeCheck(o, &PyUpb_FieldDefType)) { \ + PyErr_SetString(PyExc_TypeError, "must be a upb.FieldDef"); \ + return badret; \ + } \ + } while(0) + +static PyObject *PyUpb_FieldDef_GetOrCreate(const upb_fielddef *f) { + return PyUpb_ObjCacheGet(f, &PyUpb_FieldDefType); +} + +static PyObject *PyUpb_FieldDef_new(PyTypeObject *subtype, + PyObject *args, PyObject *kwds) { + return PyUpb_ObjCacheGet(upb_fielddef_new(), subtype); +} + +static int PyUpb_FieldDef_init(PyObject *self, PyObject *args, PyObject *kwds) { + if (!kwds) return 0; + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(kwds, &pos, &key, &value)) + PyUpb_FieldDef_setattro(self, key, value); + return 0; +} + +static void PyUpb_FieldDef_dealloc(PyObject *obj) { + PyUpb_ObjWrapper *wrapper = (void*)obj; + if (wrapper->weakreflist) PyObject_ClearWeakRefs(obj); + upb_fielddef_unref((upb_fielddef*)wrapper->obj); + obj->ob_type->tp_free(obj); +} + +static PyObject *PyUpb_FieldDef_getattro(PyObject *obj, PyObject *attr_name) { + upb_fielddef *f = Check_FieldDef(obj, NULL); + if (!upb_fielddef_ismutable(f)) { + PyErr_SetString(PyExc_TypeError, "fielddef is not mutable."); + return NULL; + } + const char *name = PyString_AsString(attr_name); + if (streql(name, "name")) { + const char *name = upb_fielddef_name(f); + return name == NULL ? Py_None : PyString_FromString(name); + } else if (streql(name, "number")) { + uint32_t num = upb_fielddef_number(f); + return num == 0 ? Py_None : PyInt_FromLong(num); + } else if (streql(name, "type")) { + uint8_t type = upb_fielddef_type(f); + return type == 0 ? Py_None : PyInt_FromLong(type); + } else if (streql(name, "label")) { + return PyInt_FromLong(upb_fielddef_label(f)); + } else if (streql(name, "type_name")) { + const char *name = upb_fielddef_typename(f); + return name == NULL ? Py_None : PyString_FromString(name); + } else if (streql(name, "subdef")) { + // NYI; + return NULL; + } else if (streql(name, "msgdef")) { + // NYI; + return NULL; + } else { + return PyUpb_Error("Invalid fielddef member."); + } +} + +static int PyUpb_FieldDef_setattro(PyObject *o, PyObject *key, PyObject *val) { + upb_fielddef *f = Check_FieldDef(o, -1); + const char *field = PyString_AsString(key); + if (!upb_fielddef_ismutable(f)) + return PyUpb_ErrorInt("fielddef is not mutable."); + if (streql(field, "name")) { + const char *name = PyString_AsString(val); + if (!name || !upb_fielddef_setname(f, name)) + return PyUpb_ErrorInt("Invalid name"); + } else if (streql(field, "number")) { + // TODO: should check truncation. Non-security issue. + // Non-int will return -1, which is already invalid as a field number. + if (!upb_fielddef_setnumber(f, PyInt_AsLong(val))) + return PyUpb_ErrorInt("Invalid number"); + } else if (streql(field, "type")) { + // TODO: should check truncation. Non-security issue. + if (!upb_fielddef_settype(f, PyInt_AsLong(val))) + return PyUpb_ErrorInt("Invalid type"); + } else if (streql(field, "label")) { + // TODO: should check truncation. Non-security issue. + if (!upb_fielddef_setlabel(f, PyInt_AsLong(val))) + return PyUpb_ErrorInt("Invalid label"); + } else if (streql(field, "type_name")) { + const char *name = PyString_AsString(val); + if (!name || !upb_fielddef_settypename(f, name)) + return PyUpb_ErrorInt("Invalid type_name"); + } else if (streql(field, "default_value")) { + // NYI + return -1; + } else { + return PyUpb_ErrorInt("Invalid fielddef member."); + } + return 0; +} + +static PyTypeObject PyUpb_FieldDefType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "upb.FieldDef", /* tp_name */ + sizeof(PyUpb_ObjWrapper), /* tp_basicsize */ + 0, /* tp_itemsize */ + &PyUpb_FieldDef_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* TODO */ /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + &PyUpb_FieldDef_getattro, /* tp_getattro */ + &PyUpb_FieldDef_setattro, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyUpb_ObjWrapper, weakreflist),/* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + &PyUpb_FieldDef_init, /* tp_init */ + 0, /* tp_alloc */ + &PyUpb_FieldDef_new, /* tp_new */ + 0, /* tp_free */ +}; + + +/* PyUpb_MessageDef ***********************************************************/ + +static PyTypeObject PyUpb_MessageDefType; +static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val); + +#define Check_MessageDef(o, badret) \ + (void*)(((PyUpb_ObjWrapper*)o)->obj); do { \ + if(!PyObject_TypeCheck(o, &PyUpb_MessageDefType)) { \ + PyErr_SetString(PyExc_TypeError, "must be a upb.MessageDef"); \ + return badret; \ + } \ + } while(0) + +static PyObject *PyUpb_MessageDef_new(PyTypeObject *subtype, + PyObject *args, PyObject *kwds) { + return PyUpb_ObjCacheGet(upb_msgdef_new(), subtype); +} + +static PyObject *PyUpb_MessageDef_add_fields(PyObject *o, PyObject *args); + +static int PyUpb_MessageDef_init(PyObject *self, PyObject *args, PyObject *kwds) { + if (!kwds) return 0; + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(kwds, &pos, &key, &value)) { + const char *field = PyString_AsString(key); + if (streql(field, "fields")) { + PyUpb_MessageDef_add_fields(self, value); + } else { + PyUpb_MessageDef_setattro(self, key, value); + } + } + return 0; +} + +static PyObject *PyUpb_MessageDef_getattro(PyObject *obj, PyObject *attr_name) { + upb_msgdef *m = Check_MessageDef(obj, NULL); + const char *name = PyString_AsString(attr_name); + if (streql(name, "fqname")) { + const char *fqname = upb_def_fqname(UPB_UPCAST(m)); + return fqname == NULL ? Py_None : PyString_FromString(fqname); + } + return PyObject_GenericGetAttr(obj, attr_name); +} + +static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val) { + upb_msgdef *m = Check_MessageDef(o, -1); + if (!upb_def_ismutable(UPB_UPCAST(m))) { + PyErr_SetString(PyExc_TypeError, "MessageDef is not mutable."); + return -1; + } + const char *name = PyString_AsString(key); + if (streql(name, "fqname")) { + const char *fqname = PyString_AsString(val); + if (!fqname || !upb_def_setfqname(UPB_UPCAST(m), fqname)) + return PyUpb_ErrorInt("Invalid fqname"); + } else { + return PyUpb_ErrorInt("Invalid MessageDef member."); + } + return 0; +} + +static PyObject *PyUpb_MessageDef_fields(PyObject *obj, PyObject *args) { + upb_msgdef *m = Check_MessageDef(obj, NULL); + PyObject *ret = PyList_New(0); + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + PyList_Append(ret, PyUpb_FieldDef_GetOrCreate(f)); + } + return ret; +} + +static PyObject *PyUpb_MessageDef_add_fields(PyObject *o, PyObject *fields) { + upb_msgdef *m = Check_MessageDef(o, NULL); + if (!PySequence_Check(fields)) return PyUpb_Error("Must be a sequence"); + Py_ssize_t len = PySequence_Length(fields); + if (len > UPB_MAX_FIELDS) return PyUpb_Error("Too many fields."); + upb_fielddef *f[len]; + int i; + for (i = 0; i < len; i++) { + PyObject *field = PySequence_GetItem(fields, i); + f[i] = Check_FieldDef(field, NULL); + } + upb_msgdef_addfields(m, f, len); + return Py_None; +} + +static PyObject *PyUpb_MessageDef_add_field(PyObject *o, PyObject *field) { + upb_msgdef *m = Check_MessageDef(o, NULL); + upb_fielddef *f = Check_FieldDef(field, NULL); + upb_msgdef_addfield(m, f); + return Py_None; +} + +static PyMethodDef PyUpb_MessageDef_methods[] = { + {"add_field", &PyUpb_MessageDef_add_field, METH_O, "Adds a list of fields."}, + {"add_fields", &PyUpb_MessageDef_add_fields, METH_O, "Adds a list of fields."}, + {"fields", &PyUpb_MessageDef_fields, METH_NOARGS, "Returns list of fields."}, + {NULL, NULL} +}; + +static PyTypeObject PyUpb_MessageDefType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "upb.MessageDef", /* tp_name */ + sizeof(PyUpb_ObjWrapper), /* tp_basicsize */ + 0, /* tp_itemsize */ + &PyUpb_Def_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* TODO */ /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + &PyUpb_MessageDef_getattro, /* tp_getattro */ + &PyUpb_MessageDef_setattro, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyUpb_ObjWrapper, weakreflist),/* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PyUpb_MessageDef_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + &PyUpb_MessageDef_init, /* tp_init */ + 0, /* tp_alloc */ + &PyUpb_MessageDef_new, /* tp_new */ + 0, /* tp_free */ +}; + + +static PyTypeObject *PyUpb_TypeForDef(const upb_def *def) { + switch(def->type) { + case UPB_DEF_MSG: return &PyUpb_MessageDefType; + default: return NULL; + } +} + +/* PyUpb_SymbolTable **********************************************************/ + +static PyTypeObject PyUpb_SymbolTableType; + +#define Check_SymbolTable(o, badret) \ + (void*)(((PyUpb_ObjWrapper*)o)->obj); do { \ + if(!PyObject_TypeCheck(o, &PyUpb_SymbolTableType)) { \ + PyErr_SetString(PyExc_TypeError, "must be a upb.MessageDef"); \ + return badret; \ + } \ + } while(0) + +static PyObject *PyUpb_SymbolTable_new(PyTypeObject *subtype, + PyObject *args, PyObject *kwds) { + return PyUpb_ObjCacheGet(upb_symtab_new(), subtype); +} + +static int PyUpb_SymbolTable_init(PyObject *self, PyObject *args, PyObject *kwds) { + return 0; +} + +static void PyUpb_SymbolTable_dealloc(PyObject *obj) { + PyUpb_ObjWrapper *wrapper = (void*)obj; + upb_symtab_unref((upb_symtab*)wrapper->obj); + obj->ob_type->tp_free(obj); +} + +// narg is a lua table containing a list of defs to add. +static PyObject *PyUpb_SymbolTable_add_defs(PyObject *o, PyObject *defs) { + upb_symtab *s = Check_SymbolTable(o, NULL); + if (!PySequence_Check(defs)) return PyUpb_Error("Must be a sequence"); + Py_ssize_t n = PySequence_Length(defs); + + // Prevent stack overflow. + if (n > 2048) return PyUpb_Error("Too many defs"); + upb_def *cdefs[n]; + + int i = 0; + for (i = 0; i < n; i++) { + PyObject *pydef = PySequence_GetItem(defs, i); + upb_def *def = Check_MessageDef(pydef, NULL); + cdefs[i++] = def; + upb_msgdef *md = upb_dyncast_msgdef(def); + if (!md) continue; + upb_msg_iter j; + for(j = upb_msg_begin(md); !upb_msg_done(j); j = upb_msg_next(md, j)) { + upb_fielddef *f = upb_msg_iter_field(j); + upb_fielddef_setaccessor(f, PyUpb_AccessorForField(f)); + } + upb_msgdef_layout(md); + } + + upb_status status = UPB_STATUS_INIT; + upb_symtab_add(s, cdefs, n, &status); + PyUpb_CheckStatus(&status); + return Py_None; +} + +static PyObject *PyUpb_SymbolTable_add_def(PyObject *o, PyObject *def) { + PyObject *defs = PyList_New(1); + PyList_SetItem(defs, 0, def); + return PyUpb_SymbolTable_add_defs(o, defs); +} + +// TODO: update to allow user to choose type of defs. +static PyObject *PyUpb_SymbolTable_defs(PyObject *o, PyObject *none) { + upb_symtab *s = Check_SymbolTable(o, NULL); + int count; + const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); + PyObject *ret = PyList_New(count); + int i; + for(i = 0; i < count; i++) + PyList_SetItem(ret, i, PyUpb_Def_GetOrCreate(defs[i])); + return ret; +} + +static PyObject *PyUpb_SymbolTable_lookup(PyObject *o, PyObject *arg) { + upb_symtab *s = Check_SymbolTable(o, NULL); + const char *name = PyString_AsString(arg); + const upb_def *def = upb_symtab_lookup(s, name); + return PyUpb_Def_GetOrCreate(def); +} + +static PyMethodDef PyUpb_SymbolTable_methods[] = { + {"add_def", &PyUpb_SymbolTable_add_def, METH_O, NULL}, + {"add_defs", &PyUpb_SymbolTable_add_defs, METH_O, NULL}, + {"defs", &PyUpb_SymbolTable_defs, METH_NOARGS, NULL}, + {"lookup", &PyUpb_SymbolTable_lookup, METH_O, NULL}, + {NULL, NULL} +}; + +static PyTypeObject PyUpb_SymbolTableType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "upb.SymbolTable", /* tp_name */ + sizeof(PyUpb_ObjWrapper), /* tp_basicsize */ + 0, /* tp_itemsize */ + &PyUpb_SymbolTable_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* TODO */ /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyUpb_ObjWrapper, weakreflist),/* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PyUpb_SymbolTable_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + &PyUpb_SymbolTable_init, /* tp_init */ + 0, /* tp_alloc */ + &PyUpb_SymbolTable_new, /* tp_new */ + 0, /* tp_free */ +}; + + +/* Accessor and PyUpb_Message *************************************************/ + +typedef struct { + PyTypeObject type; + PyTypeObject *alt_type; +} PyUpb_MessageType; + +typedef struct { + PyObject_HEAD; + PyObject *msgdef; + char data[1]; +} PyUpb_Message; + +PyObject **PyUpb_Accessor_GetPtr(PyObject *_m, upb_value fval) { + PyUpb_Message *m = (PyUpb_Message*)_m; + const upb_fielddef *f = upb_value_getfielddef(fval); + return (PyObject**)&m->data[f->offset]; +} + +static upb_sflow_t PyUpb_Message_StartSequence(void *m, upb_value fval) { + PyObject **seq = PyUpb_Accessor_GetPtr(m, fval); + PyTypeObject *type = ((PyUpb_MessageType*)Py_TYPE(m))->alt_type; + if (!*seq) *seq = type->tp_alloc(type, 0); + upb_stdmsg_sethas(m, fval); + return UPB_CONTINUE_WITH(*seq); +} + +static upb_sflow_t PyUpb_Message_StartSubmessage(void *m, upb_value fval) { + PyObject **submsg = PyUpb_Accessor_GetPtr(m, fval); + PyTypeObject *type = Py_TYPE(m); + if (!*submsg) *submsg = type->tp_alloc(type, 0); + upb_stdmsg_sethas(m, fval); + return UPB_CONTINUE_WITH(*submsg); +} + +static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval) { + (void)fval; + PyObject **elem = upb_stdarray_append(a, sizeof(void*)); + PyTypeObject *type = ((PyUpb_MessageType*)Py_TYPE(a))->alt_type; + if (!*elem) *elem = type->tp_alloc(type, 0); + return UPB_CONTINUE_WITH(*elem); +} + +static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) { + PyObject **str = PyUpb_Accessor_GetPtr(m, fval); + if (*str) { Py_DECREF(*str); } + *str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len); + upb_strref_read(upb_value_getstrref(val), PyString_AsString(*str)); + upb_stdmsg_sethas(m, fval); + return UPB_CONTINUE; +} + +static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) { + (void)fval; + PyObject **elem = upb_stdarray_append(a, sizeof(void*)); + *elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len); + upb_strref_read(upb_value_getstrref(val), PyString_AsString(*elem)); + return UPB_CONTINUE; +} + +#define STDMSG(type, size) static upb_accessor_vtbl vtbl = { \ + &PyUpb_Message_StartSubmessage, \ + &upb_stdmsg_set ## type, \ + &PyUpb_Message_StartSequence, \ + &PyUpb_Message_StartRepeatedSubmessage, \ + &upb_stdmsg_set ## type ## _r, \ + &upb_stdmsg_has, \ + &upb_stdmsg_getptr, \ + &upb_stdmsg_get ## type, \ + &upb_stdmsg_seqbegin, \ + &upb_stdmsg_ ## size ## byte_seqnext, \ + &upb_stdmsg_seqget ## type}; + +#define RETURN_STDMSG(type, size) { STDMSG(type, size); return &vtbl; } + +static upb_accessor_vtbl *PyUpb_AccessorForField(upb_fielddef *f) { + switch (f->type) { + case UPB_TYPE(DOUBLE): RETURN_STDMSG(double, 8) + case UPB_TYPE(FLOAT): RETURN_STDMSG(float, 4) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): RETURN_STDMSG(uint64, 8) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): RETURN_STDMSG(int64, 8) + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(ENUM): + case UPB_TYPE(SFIXED32): RETURN_STDMSG(int32, 4) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): RETURN_STDMSG(uint32, 4) + case UPB_TYPE(BOOL): { STDMSG(bool, 1); return &vtbl; } + case UPB_TYPE(GROUP): + case UPB_TYPE(MESSAGE): RETURN_STDMSG(ptr, 8) // TODO: 32-bit + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): { + STDMSG(ptr, 8); + vtbl.set = &PyUpb_Message_StringValue; + vtbl.append = &PyUpb_Message_AppendStringValue; + return &vtbl; + } + } + return NULL; +} + + +/* Toplevel *******************************************************************/ + +static PyMethodDef methods[] = { + {NULL, NULL} +}; + +// PyModule_AddObject steals a ref, but our object is statically allocated +// and must not be deleted. +#define PyUpb_AddType(mod, name, type) \ + if (PyType_Ready(type) < 0) return; \ + Py_INCREF(type); \ + PyModule_AddObject(mod, name, (PyObject*)type); + +PyMODINIT_FUNC initupb(void) { + PyObject *mod = Py_InitModule("upb", methods); + + PyUpb_AddType(mod, "FieldDef", &PyUpb_FieldDefType); + PyUpb_AddType(mod, "MessageDef", &PyUpb_MessageDefType); + PyUpb_AddType(mod, "SymbolTable", &PyUpb_SymbolTableType); + + PyModule_AddIntConstant(mod, "LABEL_OPTIONAL", UPB_LABEL(OPTIONAL)); + PyModule_AddIntConstant(mod, "LABEL_REQUIRED", UPB_LABEL(REQUIRED)); + PyModule_AddIntConstant(mod, "LABEL_REPEATED", UPB_LABEL(REPEATED)); + + PyModule_AddIntConstant(mod, "TYPE_DOUBLE", UPB_TYPE(DOUBLE)); + PyModule_AddIntConstant(mod, "TYPE_FLOAT", UPB_TYPE(FLOAT)); + PyModule_AddIntConstant(mod, "TYPE_INT64", UPB_TYPE(INT64)); + PyModule_AddIntConstant(mod, "TYPE_UINT64", UPB_TYPE(UINT64)); + PyModule_AddIntConstant(mod, "TYPE_INT32", UPB_TYPE(INT32)); + PyModule_AddIntConstant(mod, "TYPE_FIXED64", UPB_TYPE(FIXED64)); + PyModule_AddIntConstant(mod, "TYPE_FIXED32", UPB_TYPE(FIXED32)); + PyModule_AddIntConstant(mod, "TYPE_BOOL", UPB_TYPE(BOOL)); + PyModule_AddIntConstant(mod, "TYPE_STRING", UPB_TYPE(STRING)); + PyModule_AddIntConstant(mod, "TYPE_GROUP", UPB_TYPE(GROUP)); + PyModule_AddIntConstant(mod, "TYPE_MESSAGE", UPB_TYPE(MESSAGE)); + PyModule_AddIntConstant(mod, "TYPE_BYTES", UPB_TYPE(BYTES)); + PyModule_AddIntConstant(mod, "TYPE_UINT32", UPB_TYPE(UINT32)); + PyModule_AddIntConstant(mod, "TYPE_ENUM", UPB_TYPE(ENUM)); + PyModule_AddIntConstant(mod, "TYPE_SFIXED32", UPB_TYPE(SFIXED32)); + PyModule_AddIntConstant(mod, "TYPE_SFIXED64", UPB_TYPE(SFIXED64)); + PyModule_AddIntConstant(mod, "TYPE_SINT32", UPB_TYPE(SINT32)); + PyModule_AddIntConstant(mod, "TYPE_SINT64", UPB_TYPE(SINT64)); + + obj_cache = PyDict_New(); + reverse_cache = PyDict_New(); + static PyMethodDef method = { + "WeakRefCallback", &PyUpb_ObjCacheDeleteCallback, METH_O, NULL}; + PyObject *pyname = PyString_FromString(method.ml_name); + weakref_callback = PyCFunction_NewEx(&method, NULL, pyname); + Py_DECREF(pyname); +} diff --git a/upb/bindings/python/upb/__init__.py b/upb/bindings/python/upb/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/upb/bindings/python/upb/__init__.py diff --git a/upb/bindings/stdc++/string.h b/upb/bindings/stdc++/string.h new file mode 100644 index 0000000..668f3e3 --- /dev/null +++ b/upb/bindings/stdc++/string.h @@ -0,0 +1,60 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// Author: haberman@google.com (Josh Haberman) +// +// upb - a minimalist implementation of protocol buffers. + +#ifndef UPB_STDCPP_H_ +#define UPB_STDCPP_H_ + +namespace upb { + +template <class T> +class FillStringHandler { + public: + static void SetHandler(BytesHandler* handler) { + upb_byteshandler_setstartstr(handler, &FillStringHandler::StartString, + NULL); + upb_byteshandler_setstring(handler, &FillStringHandler::StringBuf, NULL); + } + + private: + // TODO(haberman): add UpbBind/UpbMakeHandler support to BytesHandler so these + // can be prettier callbacks. + static void* StartString(void *c, const void *hd, size_t size) { + T* str = static_cast<T*>(c); + str->clear(); + return c; + } + + static size_t StringBuf(void* c, const void* hd, const char* buf, size_t n, + const BufferHandle* h) { + T* str = static_cast<T*>(c); + try { + str->append(buf, n); + return n; + } catch (const std::exception&) { + return 0; + } + } +}; + +class StringSink { + public: + template <class T> + explicit StringSink(T* target) { + // TODO(haberman): we need to avoid rebuilding a new handler every time, + // but with class globals disallowed for google3 C++ this is tricky. + FillStringHandler<T>::SetHandler(&handler_); + input_.Reset(&handler_, target); + } + + BytesSink* input() { return &input_; } + + private: + BytesHandler handler_; + BytesSink input_; +}; + +} // namespace upb + +#endif // UPB_STDCPP_H_ diff --git a/upb/bindings/stdc/error.c b/upb/bindings/stdc/error.c new file mode 100644 index 0000000..85c9ca6 --- /dev/null +++ b/upb/bindings/stdc/error.c @@ -0,0 +1,43 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Handling of errno. + */ + +#include "upb/stdc/error.h" + +#include <string.h> + +void upb_status_fromerrno(upb_status *status, int code) { + if (code != 0 && !upb_errno_is_wouldblock(code)) { + status->error = true; + upb_status_setcode(status, &upb_stdc_errorspace, code); + } +} + +bool upb_errno_is_wouldblock(int code) { + return +#ifdef EAGAIN + code == EAGAIN || +#endif +#ifdef EWOULDBLOCK + code == EWOULDBLOCK || +#endif + false; +} + +bool upb_stdc_codetostr(int code, char *buf, size_t len) { + // strerror() may use static buffers and is not guaranteed to be thread-safe, + // but it appears that it is not subject to buffer overflows in practice, and + // it used by other portable and high-quality software like Lua. For more + // discussion see: http://thread.gmane.org/gmane.comp.lang.lua.general/89506 + char *err = strerror(code); + if (strlen(err) >= len) return false; + strcpy(buf, err); + return true; +} + +upb_errorspace upb_stdc_errorspace = {"stdc", &upb_stdc_codetostr}; diff --git a/upb/bindings/stdc/error.h b/upb/bindings/stdc/error.h new file mode 100644 index 0000000..9802097 --- /dev/null +++ b/upb/bindings/stdc/error.h @@ -0,0 +1,27 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Handling of errno. + */ + +#include "upb/upb.h" + +#ifndef UPB_STDC_ERROR_H_ +#define UPB_STDC_ERROR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern upb_errorspace upb_stdc_errorspace; +void upb_status_fromerrno(upb_status *status, int code); +bool upb_errno_is_wouldblock(int code); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_STDC_ERROR_H_ */ diff --git a/upb/bindings/stdc/io.c b/upb/bindings/stdc/io.c new file mode 100644 index 0000000..5d36aa5 --- /dev/null +++ b/upb/bindings/stdc/io.c @@ -0,0 +1,178 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include "upb/stdc/io.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include "upb/stdc/error.h" + +// We can make this configurable if necessary. +#define BUF_SIZE 32768 + +/* upb_stdio ******************************************************************/ + +int upb_stdio_cmpbuf(const void *_key, const void *_elem) { + const uint64_t *ofs = _key; + const upb_stdio_buf *buf = _elem; + return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE); +} + +static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) { + // TODO: it is probably faster to linear search short lists, and to + // special-case the last one or two bufs. + return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf); +} + +static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) { + upb_stdio_buf **reuse = NULL; // XXX + int num_reused = 0, num_inuse = 0; + + // Could sweep only a subset of bufs if this was a hotspot. + for (int i = 0; i < s->nbuf; i++) { + upb_stdio_buf *buf = s->bufs[i]; + if (buf->refcount > 0) { + s->bufs[num_inuse++] = buf; + } else { + reuse[num_reused++] = buf; + } + } + assert(num_reused + num_inuse == s->nbuf); + memcpy(s->bufs + num_inuse, reuse, num_reused * sizeof(upb_stdio_buf*)); + if (num_reused == 0) { + ++s->nbuf; + s->bufs = realloc(s->bufs, s->nbuf * sizeof(*s->bufs)); + s->bufs[s->nbuf-1] = malloc(sizeof(upb_stdio_buf) + BUF_SIZE); + return s->bufs[s->nbuf-1]; + } + return s->bufs[s->nbuf-num_reused]; +} + +void upb_stdio_discard(void *src, uint64_t ofs) { + (void)src; + (void)ofs; +} + +upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) { + (void)ofs; + upb_stdio *stdio = (upb_stdio*)src; + upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio); +retry: + *bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file); + buf->len = *bytes_read; + if (*bytes_read < (size_t)BUF_SIZE) { + // Error or EOF. + if (feof(stdio->file)) { + upb_status_seteof(&stdio->src.status); + return UPB_BYTE_EOF; + } + if (ferror(stdio->file)) { +#ifdef EINTR + // If we encounter a client who doesn't want to retry EINTR, we can easily + // add a boolean property of the stdio that controls this behavior. + if (errno == EINTR) { + clearerr(stdio->file); + goto retry; + } +#endif + upb_status_fromerrno(&stdio->src.status, errno); + return upb_errno_is_wouldblock(errno) ? + UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR; + } + assert(false); + } + return UPB_BYTE_OK; +} + +void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + ofs -= buf->ofs; + memcpy(dst, buf->data + ofs, BUF_SIZE - ofs); + len -= (BUF_SIZE - ofs); + dst += (BUF_SIZE - ofs); + while (len > 0) { + ++buf; + size_t bytes = UPB_MIN(len, BUF_SIZE); + memcpy(dst, buf->data, bytes); + len -= bytes; + dst += bytes; + } +} + +const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + ofs -= buf->ofs; + *len = BUF_SIZE - ofs; + return &buf->data[ofs]; +} + +#if 0 +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); + upb_strlen_t len = upb_string_len(str); + upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + if (written < len) { + upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); + int written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_status_seterrf(status, "Error writing to stdio stream."); + return -1; + } + return written; +} +#endif + +void upb_stdio_init(upb_stdio *stdio) { + static upb_bytesrc_vtbl bytesrc_vtbl = { + &upb_stdio_fetch, + &upb_stdio_discard, + &upb_stdio_copy, + &upb_stdio_getptr, + }; + upb_bytesrc_init(&stdio->src, &bytesrc_vtbl); + + //static upb_bytesink_vtbl bytesink_vtbl = { + // upb_stdio_putstr, + // upb_stdio_vprintf + //}; + //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); +} + +void upb_stdio_reset(upb_stdio* stdio, FILE *file) { + stdio->file = file; + stdio->should_close = false; +} + +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s) { + FILE *f = fopen(filename, mode); + if (!f) { + upb_status_fromerrno(s, errno); + return; + } + setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own. + upb_stdio_reset(stdio, f); + stdio->should_close = true; +} + +void upb_stdio_uninit(upb_stdio *stdio) { + // Can't report status; caller should flush() to ensure data is written. + if (stdio->should_close) fclose(stdio->file); + stdio->file = NULL; +} + +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } diff --git a/upb/bindings/stdc/io.h b/upb/bindings/stdc/io.h new file mode 100644 index 0000000..fd19bef --- /dev/null +++ b/upb/bindings/stdc/io.h @@ -0,0 +1,73 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * ANSI C file I/O. + */ + +#ifndef UPB_STDC_IO_H_ +#define UPB_STDC_IO_H_ + +#include <stdio.h> +#include "upb/bytestream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stdio ******************************************************************/ + +// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but +// more portable. +// +// Specifically, stdio functions acquire locks on every operation (unless you +// use the f{read,write,...}_unlocked variants, which are not standard) and +// performs redundant buffering (unless you disable it with setvbuf(), but we +// can only do this on newly-opened filehandles). + +typedef struct { + uint64_t ofs; + size_t len; + uint32_t refcount; + char data[]; +} upb_stdio_buf; + +// We use a single object for both bytesrc and bytesink for simplicity. +// The object is still not thread-safe, and may only be used by one reader +// and one writer at a time. +typedef struct { + upb_bytesrc src; + upb_bytesink sink; + FILE *file; + bool should_close; + upb_stdio_buf **bufs; + int nbuf; + uint32_t szbuf; +} upb_stdio; + +void upb_stdio_init(upb_stdio *stdio); +// Caller should call upb_stdio_flush prior to calling this to ensure that +// all data is flushed, otherwise data can be silently dropped if an error +// occurs flushing the remaining buffers. +void upb_stdio_uninit(upb_stdio *stdio); + +// Resets the object to read/write to the given "file." The caller is +// responsible for closing the file, which must outlive this object. +void upb_stdio_reset(upb_stdio *stdio, FILE *file); + +// As an alternative to upb_stdio_reset(), initializes the object by opening a +// file, and will handle closing it. This may result in more efficient I/O +// than the previous since we can call setvbuf() to disable buffering. +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s); + +upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_STDC_IO_H_ */ |