From 7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 15 Feb 2013 16:27:18 -0800 Subject: Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). --- Makefile | 42 +- README | 83 ++ benchmarks/parsestream.upb.c | 12 +- bindings/cpp/upb/bytestream.cc | 39 - bindings/cpp/upb/bytestream.hpp | 276 ------- bindings/cpp/upb/def.hpp | 462 ----------- bindings/cpp/upb/handlers.cc | 39 - bindings/cpp/upb/handlers.hpp | 176 ---- bindings/cpp/upb/msg.hpp | 62 -- bindings/cpp/upb/pb/decoder.hpp | 12 +- bindings/cpp/upb/pb/glue.hpp | 35 - bindings/cpp/upb/proto2_bridge.cc | 892 --------------------- bindings/cpp/upb/proto2_bridge.hpp | 170 ---- bindings/cpp/upb/upb.hpp | 81 -- bindings/linux/Makefile | 4 - bindings/linux/ctype.h | 8 - bindings/linux/inttypes.h | 22 - bindings/linux/setjmp.S | 60 -- bindings/linux/setjmp.h | 13 - bindings/linux/string.h | 13 - bindings/lua/LICENSE | 32 + bindings/lua/lunitx/atexit.lua | 32 + bindings/lua/lunitx/lunit.lua | 725 +++++++++++++++++ bindings/lua/lunitx/lunit/console.lua | 156 ++++ bindings/lua/lunitx/lunitx.lua | 21 + bindings/lua/table.c | 167 ++++ bindings/lua/test.lua | 345 +++++--- bindings/lua/upb.c | 1412 +++++++++++++++++---------------- bindings/lua/upb.h | 45 ++ dynasm/COPYRIGHT | 2 - tests/test_cpp.cc | 20 +- tests/test_decoder.cc | 258 +++--- tests/test_decoder_schema.proto | 64 ++ tests/test_def.c | 137 +++- tests/test_table.cc | 13 +- tests/test_varint.c | 13 +- tests/test_vs_proto2.cc | 39 +- tests/testmain.cc | 18 + tests/upb_test.h | 21 +- tools/dump_cinit.lua | 414 ++++++++++ tools/test_cinit.lua | 78 ++ tools/upbc.c | 197 ----- tools/upbc.lua | 50 ++ upb/bytestream.c | 6 +- upb/bytestream.h | 284 +++++-- upb/def.c | 991 ++++++++++------------- upb/def.h | 1315 +++++++++++++++++++----------- upb/descriptor.proto | 533 ------------- upb/descriptor/descriptor.proto | 533 +++++++++++++ upb/descriptor/descriptor.upb.c | 483 +++++++++++ upb/descriptor/descriptor.upb.h | 90 +++ upb/descriptor/descriptor_const.h | 349 -------- upb/descriptor/reader.c | 401 ++++------ upb/descriptor/reader.h | 2 +- upb/google/README | 16 + upb/google/bridge.cc | 260 ++++++ upb/google/bridge.h | 76 ++ upb/google/cord.h | 48 ++ upb/google/proto1.cc | 502 ++++++++++++ upb/google/proto1.h | 53 ++ upb/google/proto2.cc | 632 +++++++++++++++ upb/google/proto2.h | 62 ++ upb/handlers.c | 555 +++++++------ upb/handlers.h | 1014 ++++++++++++++--------- upb/msg.c | 52 -- upb/msg.h | 153 ---- upb/pb/decoder.c | 252 +++--- upb/pb/decoder.h | 61 +- upb/pb/decoder_x64.dasc | 646 +++++++++------ upb/pb/glue.c | 14 +- upb/pb/glue.h | 26 +- upb/pb/textprinter.c | 217 ++--- upb/pb/textprinter.h | 5 +- upb/pb/varint.c | 59 +- upb/pb/varint.h | 88 +- upb/refcount.c | 236 ------ upb/refcount.h | 73 -- upb/refcounted.c | 776 ++++++++++++++++++ upb/refcounted.h | 180 +++++ upb/sink.c | 205 +++++ upb/sink.h | 82 ++ upb/stdc/error.c | 1 - upb/stdc/io.c | 3 + upb/symtab.c | 326 ++++++++ upb/symtab.h | 200 +++++ upb/table.c | 208 +++-- upb/table.h | 103 ++- upb/upb.c | 26 +- upb/upb.h | 303 +++++-- 89 files changed, 11720 insertions(+), 7540 deletions(-) delete mode 100644 bindings/cpp/upb/bytestream.cc delete mode 100644 bindings/cpp/upb/bytestream.hpp delete mode 100644 bindings/cpp/upb/def.hpp delete mode 100644 bindings/cpp/upb/handlers.cc delete mode 100644 bindings/cpp/upb/handlers.hpp delete mode 100644 bindings/cpp/upb/msg.hpp delete mode 100644 bindings/cpp/upb/pb/glue.hpp delete mode 100644 bindings/cpp/upb/proto2_bridge.cc delete mode 100644 bindings/cpp/upb/proto2_bridge.hpp delete mode 100644 bindings/cpp/upb/upb.hpp delete mode 100644 bindings/linux/ctype.h delete mode 100644 bindings/linux/inttypes.h delete mode 100644 bindings/linux/setjmp.S delete mode 100644 bindings/linux/setjmp.h create mode 100644 bindings/lua/LICENSE create mode 100644 bindings/lua/lunitx/atexit.lua create mode 100644 bindings/lua/lunitx/lunit.lua create mode 100644 bindings/lua/lunitx/lunit/console.lua create mode 100644 bindings/lua/lunitx/lunitx.lua create mode 100644 bindings/lua/table.c create mode 100644 bindings/lua/upb.h create mode 100644 tests/test_decoder_schema.proto create mode 100644 tests/testmain.cc create mode 100644 tools/dump_cinit.lua create mode 100644 tools/test_cinit.lua delete mode 100644 tools/upbc.c create mode 100644 tools/upbc.lua delete mode 100644 upb/descriptor.proto create mode 100644 upb/descriptor/descriptor.proto create mode 100755 upb/descriptor/descriptor.upb.c create mode 100755 upb/descriptor/descriptor.upb.h delete mode 100644 upb/descriptor/descriptor_const.h create mode 100644 upb/google/README create mode 100644 upb/google/bridge.cc create mode 100644 upb/google/bridge.h create mode 100644 upb/google/cord.h create mode 100644 upb/google/proto1.cc create mode 100644 upb/google/proto1.h create mode 100644 upb/google/proto2.cc create mode 100644 upb/google/proto2.h delete mode 100644 upb/msg.c delete mode 100644 upb/msg.h delete mode 100644 upb/refcount.c delete mode 100644 upb/refcount.h create mode 100644 upb/refcounted.c create mode 100644 upb/refcounted.h create mode 100644 upb/sink.c create mode 100644 upb/sink.h create mode 100644 upb/symtab.c create mode 100644 upb/symtab.h diff --git a/Makefile b/Makefile index de36900..c5df799 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ CXXFLAGS=-Ibindings/cpp INCLUDE=-Itests -I. CPPFLAGS=$(INCLUDE) -Wall -Wextra $(USER_CFLAGS) LDLIBS=-lpthread upb/libupb.a +LUA=lua5.1 # 5.1 and 5.2 should both be supported # Build with "make Q=" to see all commands that are being executed. Q=@ @@ -84,23 +85,24 @@ CORE= \ upb/bytestream.c \ upb/def.c \ upb/descriptor/reader.c \ + upb/descriptor/descriptor.upb.c \ + upb/google/bridge.cc \ + upb/google/proto2.cc \ upb/handlers.c \ - upb/msg.c \ - upb/refcount.c \ - upb/stdc/error.c \ - upb/stdc/io.c \ + upb/refcounted.c \ + upb/sink.c \ + upb/symtab.c \ upb/table.c \ upb/upb.c \ - bindings/cpp/upb/proto2_bridge.cc \ # TODO: the proto2 bridge should be built as a separate library. # Library for the protocol buffer format (both text and binary). PB= \ upb/pb/decoder.c \ - upb/pb/varint.c \ upb/pb/glue.c \ upb/pb/textprinter.c \ + upb/pb/varint.c \ # Rules. ####################################################################### @@ -170,7 +172,7 @@ upb/def.lo: upb/def.c upb/pb/decoder_x64.h: upb/pb/decoder_x64.dasc $(E) DYNASM $< - $(Q) lua dynasm/dynasm.lua upb/pb/decoder_x64.dasc > upb/pb/decoder_x64.h + $(Q) $(LUA) dynasm/dynasm.lua upb/pb/decoder_x64.dasc > upb/pb/decoder_x64.h ifneq ($(shell uname), Darwin) upb/pb/jit_debug_elf_file.o: upb/pb/jit_debug_elf_file.s @@ -214,24 +216,36 @@ SIMPLE_TESTS= \ tests/test_varint \ SIMPLE_CXX_TESTS= \ - tests/test_table \ tests/test_cpp \ - tests/test_decoder \ + + # The build process for this test is complicated and hasn't been + # ported to the open-source Makefile yet. + # tests/test_decoder \ VARIADIC_TESTS= \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ -TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) +TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) tests/test_table tests: $(TESTS) $(INTERACTIVE_TESTS) $(TESTS): $(LIBUPB) tests/test_def: tests/test.proto.pb +tests/testmain.o: tests/testmain.cc + $(E) CXX $< + $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< + +$(SIMPLE_TESTS): tests/testmain.o $(SIMPLE_TESTS): % : %.c $(E) CC $< - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB) + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ tests/testmain.o $< $(LIBUPB) + +$(SIMPLE_CXX_TESTS): tests/testmain.o +$(SIMPLE_CXX_TESTS): % : %.cc + $(E) CXX $< + $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $@ tests/testmain.o $< $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 test: tests @@ -258,7 +272,7 @@ tests/t.test_vs_proto2.googlemessage2: \ -DMESSAGE_FILE=\"../benchmarks/google_message1.dat\" \ -DMESSAGE_CIDENT="benchmarks::SpeedMessage1" \ -DMESSAGE_HFILE=\"../benchmarks/google_messages.pb.h\" \ - benchmarks/google_messages.pb.cc -lprotobuf -lpthread $(LIBUPB) + benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread $(LIBUPB) $(E) CXX $< '(benchmarks::SpeedMessage2)' $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o tests/t.test_vs_proto2.googlemessage2 $< \ -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ @@ -266,11 +280,11 @@ tests/t.test_vs_proto2.googlemessage2: \ -DMESSAGE_FILE=\"../benchmarks/google_message2.dat\" \ -DMESSAGE_CIDENT="benchmarks::SpeedMessage2" \ -DMESSAGE_HFILE=\"../benchmarks/google_messages.pb.h\" \ - benchmarks/google_messages.pb.cc -lprotobuf -lpthread $(LIBUPB) + benchmarks/google_messages.pb.cc tests/testmain.o -lprotobuf -lpthread $(LIBUPB) tests/test_table: tests/test_table.cc @# Includes which is a deprecated header. $(E) CXX $< - $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB) + $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< tests/testmain.o $(LIBUPB) tests/tests: upb/libupb.a diff --git a/README b/README index 45e9ff0..e52f06a 100644 --- a/README +++ b/README @@ -34,6 +34,89 @@ the major things that are broken or not yet implemented yet: - serialization isn't written yet (only deserialization) +C/C++ API +========= + +upb's main interfaces are defined in .h files (like upb/def.h). These header +files are coded in such a way that they are not only compatible with C and C++ +but provide idiomatic interfaces to both (functions for C, classes for C++). + +Here is the general strategy/pattern for this. I'll explain it piece by piece. + +// This defines a type called upb::Foo in C++ or upb_foo in C. In both cases +// there is a typedef for upb_foo, which is important since this is how the +// C functions are defined (which are exposed to both C and C++). + +#ifdef __cplusplus +namespace upb { class Foo; } +typedef upb::Foo upb_foo; +extern "C" { +#else +struct upb_foo; +typedef struct upb_foo upb_foo; +#endif + +// Here is the actual definition of the class/struct. In C++ we get a class +// called upb::Foo and in C we get a struct called "struct upb_foo", but both +// have the same members and the C++ version is "standard-layout" according +// to C++11. This means that the two should be compatible. +// +// In addition to being completely accessible from C, it also provides C++ +// niceities like methods (instead of bare functions). We also get +// encapsulation in C++, even though this is impossible to provide in C. We +// provide all method documentation in the C++ class, since the class/method +// syntax is nicer to read than the bare functions of C. + +#ifdef __cplusplus + +class upb::Foo { + public: + // Method documentation for DoBar(). + void DoBar(int32_t x); + + // Method documentation for IsSpicy(). + bool IsSpicy(); + + private: + +#else +struct upb_foo { +#endif + int32_t private_member; +}; + +// Next follows the C API, which is how the functionality is actually +// implemented. We omit documentation here because everything was documented +// in the C++ class, and it's easy to match the functions 1:1 to the C++ +// methods. +void upb_foo_dobar(upb_foo *f, int32_t x); +bool upb_foo_isspicy(upb_foo *f); + +// Finally we include inline definitions of the C++ methods, which are nothing +// but this wrappers around the C functions. Since these are inline, the C++ +// API imposes no overhead. + +#ifdef __cplusplus +} // extern "C" + +namespace upb { +inline void Foo::DoBar(int32_t x) { upb_foo_dobar(this, x); } +inline bool Foo::IsSpicy() { return upb_foo_isspicy(this); } +} +#endif + +This scheme works pretty nicely. It adds a bit of noise to the header file, but +gives nice, zero-overhead APIs to both C and C++ without having to duplicate +the API documentation. + +The biggest bummer is that there isn't any good way to use C++ inheritance +even for types which are trying to express inheritance in C. C++ just doesn't +give any guarantees about how it will arrange data members in base classes, +so we can't use C++ inheritance while interoperating with C layouts. The +biggest effect of this is that we can't get C++'s nice implicit upcasts; all +upcasts have to be explicit, which is a pain. + + CONTACT ======= diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c index 781b97a..b5a353c 100644 --- a/benchmarks/parsestream.upb.c +++ b/benchmarks/parsestream.upb.c @@ -27,11 +27,16 @@ static upb_flow_t value(void *closure, upb_value fval, upb_value val) { return UPB_CONTINUE; } +void onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) { + upb_fhandlers_setvalue(fh, &value); + upb_fhandlers_setstartsubmsg(fh, &startsubmsg); +} + static bool initialize() { // Initialize upb state, decode descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *s = upb_symtab_new(&s); + upb_symtab *s = upb_symtab_new(); upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status); if(!upb_ok(&status)) { fprintf(stderr, "Error reading descriptor: %s\n", @@ -44,7 +49,7 @@ static bool initialize() fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME); return false; } - upb_symtab_unref(s, &s); + upb_symtab_unref(s); // Read the message data itself. input_str = upb_readfile(MESSAGE_FILE, &input_len); @@ -55,8 +60,7 @@ static bool initialize() upb_handlers *handlers = upb_handlers_new(); // Cause all messages to be read, but do nothing when they are. - upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL, NULL, NULL}; - upb_handlers_reghandlerset(handlers, def, &hset); + upb_handlers_regmsgdef(handlers, def, NULL, &upb_onfreg_hset, NULL); upb_decoder_init(&decoder); plan = upb_decoderplan_new(handlers, JIT); upb_decoder_resetplan(&decoder, plan, 0); diff --git a/bindings/cpp/upb/bytestream.cc b/bindings/cpp/upb/bytestream.cc deleted file mode 100644 index df0797e..0000000 --- a/bindings/cpp/upb/bytestream.cc +++ /dev/null @@ -1,39 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman - -#include "bytestream.hpp" - -namespace upb { - -upb_bytesrc_vtbl* ByteSourceBase::vtable() { - static upb_bytesrc_vtbl vtbl = { - &ByteSourceBase::VFetch, - &ByteSourceBase::VDiscard, - &ByteSourceBase::VCopy, - &ByteSourceBase::VGetPtr, - }; - return &vtbl; -} - -upb_bytesuccess_t ByteSourceBase::VFetch(void *src, uint64_t ofs, size_t *len) { - return static_cast(src)->Fetch(ofs, len); -} - -void ByteSourceBase::VCopy( - const void *src, uint64_t ofs, size_t len, char* dest) { - static_cast(src)->Copy(ofs, len, dest); -} - -void ByteSourceBase::VDiscard(void *src, uint64_t ofs) { - static_cast(src)->Discard(ofs); -} - -const char * ByteSourceBase::VGetPtr( - const void *src, uint64_t ofs, size_t* len) { - return static_cast(src)->GetPtr(ofs, len); -} - -} // namespace upb diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp deleted file mode 100644 index 37d8157..0000000 --- a/bindings/cpp/upb/bytestream.hpp +++ /dev/null @@ -1,276 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// This file defines three core interfaces: -// - upb::ByteSink: for writing streams of data. -// - upb::ByteSource: for reading streams of data. -// - upb::ByteRegion: for reading from a specific region of a ByteSource; -// should be used by decoders instead of using a ByteSource directly. -// -// These interfaces are used by streaming encoders and decoders: for example, a -// protobuf parser gets its input from a upb::ByteRegion. They are virtual -// base classes so concrete implementations can get the data from a fd, a -// FILE*, a string, etc. -// -// A ByteRegion represents a region of data from a ByteSource. -// -// Parsers get data from this interface instead of a bytesrc because we often -// want to parse only a specific region of the input. For example, if we parse -// a string from our input but know that the string represents a protobuf, we -// can pass its ByteRegion to an appropriate protobuf parser. -// -// Since the bytes may be coming from a file or network socket, bytes must be -// fetched before they can be read (though in some cases this fetch may be a -// no-op). "fetch" is the only operation on a byteregion that could fail or -// block, because it is the only operation that actually performs I/O. -// -// Bytes can be discarded when they are no longer needed. Parsers should -// always discard bytes they no longer need, both so the buffers can be freed -// when possible and to give better visibility into what bytes the parser is -// still using. -// -// start discard read fetch end -// ofs ofs ofs ofs ofs -// | |--->Discard() | |--->Fetch() | -// V V V V V -// +-------------+-------------------------+-----------------+-----------------+ -// | discarded | | | fetchable | -// +-------------+-------------------------+-----------------+-----------------+ -// | <------------- loaded ------------------> | -// | <- available -> | -// | <---------- remaining ----------> | -// -// Note that the start offset may be something other than zero! A byteregion -// is a view into an underlying bytesrc stream, and the region may start -// somewhere other than the beginning of that stream. -// -// The region can be either delimited or nondelimited. A non-delimited region -// will keep returning data until the underlying data source returns EOF. A -// delimited region will return EOF at a predetermined offset. -// -// end -// ofs -// | -// V -// +-----------------------+ -// | delimited region | <-- hard EOF, even if data source has more data. -// +-----------------------+ -// -// +------------------------ -// | nondelimited region Z <-- won't return EOF until data source hits EOF. -// +------------------------ - -#ifndef UPB_BYTESTREAM_HPP -#define UPB_BYTESTREAM_HPP - -#include "upb/bytestream.h" -#include "upb/upb.hpp" -#include - -namespace upb { - -typedef upb_bytesuccess_t ByteSuccess; - -// Implement this interface to vend bytes to ByteRegions which will be used by -// a decoder. -class ByteSourceBase : public upb_bytesrc { - public: - ByteSourceBase() { upb_bytesrc_init(this, vtable()); } - virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); } - - // Fetches at least one byte starting at ofs, setting *len to the actual - // number of bytes fetched (or 0 on EOF or error: see return value for - // details). It is valid for bytes to be fetched multiple times, as long as - // the bytes have not been previously discarded. - virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0; - - // Discards all data prior to ofs (except data that is pinned, if pinning - // support is added -- see TODO below). - virtual void Discard(uint64_t ofs) = 0; - - // Copies "len" bytes of data from ofs to "dst", which must be at least "len" - // bytes long. The given region must not be discarded. - virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0; - - // Returns a pointer to the bytesrc's internal buffer, storing in *len how - // much data is available. The given offset must not be discarded. The - // returned buffer is valid for as long as its bytes are not discarded (in - // the case that part of the returned buffer is discarded, only the - // non-discarded bytes remain valid). - virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0; - - // TODO: Add if/when there is a demonstrated need: - // - // // When the caller pins a region (which must not be already discarded), it - // // is guaranteed that the region will not be discarded (nor will the - // // bytesrc be destroyed) until the region is unpinned. However, not all - // // bytesrc's support pinning; a false return indicates that a pin was not - // // possible. - // virtual bool Pin(uint64_t ofs, size_t len); - // - // // Releases some number of pinned bytes from the beginning of a pinned - // // region (which may be fewer than the total number of bytes pinned). - // virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release); - // - // Adding pinning support would also involve adding a "pin_ofs" parameter to - // upb_bytesrc_fetch, so that the fetch can extend an already-pinned region. - private: - static upb_bytesrc_vtbl* vtable(); - static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*); - static void VDiscard(void*, uint64_t); - static void VCopy(const void*, uint64_t, size_t, char*); - static const char *VGetPtr(const void*, uint64_t, size_t*); -}; - -class ByteRegion : public upb_byteregion { - public: - static const uint64_t kNondelimited = UPB_NONDELIMITED; - - ByteRegion() { upb_byteregion_init(this); } - ~ByteRegion() { upb_byteregion_uninit(this); } - - // Accessors for the regions bounds -- the meaning of these is described in - // the diagram above. - uint64_t start_ofs() const { return upb_byteregion_startofs(this); } - uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); } - uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); } - uint64_t end_ofs() const { return upb_byteregion_endofs(this); } - - // Returns how many bytes are fetched and available for reading starting from - // offset "offset". - uint64_t BytesAvailable(uint64_t offset) const { - return upb_byteregion_available(this, offset); - } - - // Returns the total number of bytes remaining after offset "offset", or - // kNondelimited if the byteregion is non-delimited. - uint64_t BytesRemaining(uint64_t offset) const { - return upb_byteregion_remaining(this, offset); - } - - uint64_t Length() const { return upb_byteregion_len(this); } - - // Sets the value of this byteregion to be a subset of the given byteregion's - // data. The caller is responsible for releasing this region before the src - // region is released (unless the region is first pinned, if pinning support - // is added. see below). - void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) { - upb_byteregion_reset(this, src, ofs, len); - } - void Release() { upb_byteregion_release(this); } - - // Attempts to fetch more data, extending the fetched range of this - // byteregion. Returns true if the fetched region was extended by at least - // one byte, false on EOF or error (see *s for details). - ByteSuccess Fetch() { return upb_byteregion_fetch(this); } - - // Fetches all remaining data, returning false if the operation failed (see - // *s for details). May only be used on delimited byteregions. - ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); } - - // Discards bytes from the byteregion up until ofs (which must be greater or - // equal to discard_ofs()). It is valid to discard bytes that have not been - // fetched (such bytes will never be fetched) but it is an error to discard - // past the end of a delimited byteregion. - void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); } - - // Copies "len" bytes of data into "dst", starting at ofs. The specified - // region must be available. - void Copy(uint64_t ofs, size_t len, char *dst) const { - upb_byteregion_copy(this, ofs, len, dst); - } - - // Copies all bytes from the byteregion into dst. Requires that the entire - // byteregion is fetched and that none has been discarded. - void CopyAll(char *dst) const { - upb_byteregion_copyall(this, dst); - } - - // Returns a pointer to the internal buffer for the byteregion starting at - // offset "ofs." Stores the number of bytes available in this buffer in *len. - // The returned buffer is invalidated when the byteregion is reset or - // released, or when the bytes are discarded. If the byteregion is not - // currently pinned, the pointer is only valid for the lifetime of the parent - // byteregion. - const char *GetPtr(uint64_t ofs, size_t *len) const { - return upb_byteregion_getptr(this, ofs, len); - } - - // Copies the contents of the byteregion into a newly-allocated, - // NULL-terminated string. Requires that the byteregion is fully fetched. - char *StrDup() const { - return upb_byteregion_strdup(this); - } - - template void AssignToString(T* str) { - uint64_t ofs = start_ofs(); - size_t len; - const char *ptr = GetPtr(ofs, &len); - // Emperically calling reserve() here is counterproductive and slows down - // benchmarks. If the parsing is happening in a tight loop that is reusing - // the string object, there is probably enough data reserved already and - // the reserve() call is extra overhead. - str->assign(ptr, len); - ofs += len; - while (ofs < end_ofs()) { - ptr = GetPtr(ofs, &len); - str->append(ptr, len); - ofs += len; - } - } - - // TODO: add if/when there is a demonstrated need. - // - // // Pins this byteregion's bytes in memory, allowing it to outlive its - // // parent byteregion. Normally a byteregion may only be used while its - // // parent is still valid, but a pinned byteregion may continue to be used - // // until it is reset or released. A byteregion must be fully fetched to - // // be pinned (this implies that the byteregion must be delimited). - // // - // // In some cases this operation may cause the input data to be copied. - // // - // // void Pin(); -}; - -class StringSource : public upb_stringsrc { - public: - StringSource() : upb_stringsrc() { upb_stringsrc_init(this); } - template explicit StringSource(const T& str) { - upb_stringsrc_init(this); - Reset(str); - } - StringSource(const char *data, size_t len) { - upb_stringsrc_init(this); - Reset(data, len); - } - ~StringSource() { upb_stringsrc_uninit(this); } - - void Reset(const char* data, size_t len) { - upb_stringsrc_reset(this, data, len); - } - - template void Reset(const T& str) { - Reset(str.c_str(), str.size()); - } - - ByteRegion* AllBytes() { - return static_cast(upb_stringsrc_allbytes(this)); - } - - upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); } -}; - -template <> inline ByteRegion* GetValue(Value v) { - return static_cast(upb_value_getbyteregion(v)); -} - -template <> inline Value MakeValue(ByteRegion* v) { - return upb_value_byteregion(v); -} - -} // namespace upb - -#endif diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp deleted file mode 100644 index 6547255..0000000 --- a/bindings/cpp/upb/def.hpp +++ /dev/null @@ -1,462 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// The set of upb::*Def classes and upb::SymbolTable allow for defining and -// manipulating schema information (as defined in .proto files). -// -// Defs go through two distinct phases of life: -// -// 1. MUTABLE: when first created, the properties of the def can be set freely -// (for example a message's name, its list of fields, the name/number of -// fields, etc). During this phase the def is *not* thread-safe, and may -// not be used for any purpose except to set its properties (it can't be -// used to parse anything, create any messages in memory, etc). -// -// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs, -// which makes them thread-safe and immutable. Finalized defs may only be -// accessed through a CONST POINTER. If you want to modify an existing -// immutable def, copy it with Dup() and modify and finalize the copy. -// -// The refcounting of defs works properly no matter what state the def is in. -// Once the def is finalized it is guaranteed that any def reachable from a -// live def is also live (so a ref on the base of a message tree keeps the -// whole tree alive). -// -// You can test for which stage of life a def is in by calling IsMutable(). -// This is particularly useful for dynamic language bindings, which must -// properly guarantee that the dynamic language cannot break the rules laid out -// above. -// -// It would be possible to make the defs thread-safe during stage 1 by using -// mutexes internally and changing any methods returning pointers to return -// copies instead. This could be important if we are integrating with a VM or -// interpreter that does not naturally serialize access to wrapped objects (for -// example, in the case of Python this is not necessary because of the GIL). - -#ifndef UPB_DEF_HPP -#define UPB_DEF_HPP - -#include -#include -#include -#include "upb/def.h" -#include "upb/upb.hpp" - -namespace upb { - -class Def; -class MessageDef; - -typedef upb_fieldtype_t FieldType; -typedef upb_label_t Label; - -class FieldDef : public upb_fielddef { - public: - static FieldDef* Cast(upb_fielddef *f) { return static_cast(f); } - static const FieldDef* Cast(const upb_fielddef *f) { - return static_cast(f); - } - - static FieldDef* New(const void *owner) { - return Cast(upb_fielddef_new(owner)); - } - FieldDef* Dup(const void *owner) const { - return Cast(upb_fielddef_dup(this, owner)); - } - void Ref(const void *owner) { upb_fielddef_ref(this, owner); } - void Unref(const void *owner) { upb_fielddef_unref(this, owner); } - - bool IsMutable() const { return upb_fielddef_ismutable(this); } - bool IsFinalized() const { return upb_fielddef_isfinalized(this); } - bool IsString() const { return upb_isstring(this); } - bool IsSequence() const { return upb_isseq(this); } - bool IsSubmessage() const { return upb_issubmsg(this); } - - // Simple accessors. ///////////////////////////////////////////////////////// - - FieldType type() const { return upb_fielddef_type(this); } - Label label() const { return upb_fielddef_label(this); } - int32_t number() const { return upb_fielddef_number(this); } - std::string name() const { return std::string(upb_fielddef_name(this)); } - Value default_() const { return upb_fielddef_default(this); } - Value bound_value() const { return upb_fielddef_fval(this); } - uint16_t offset() const { return upb_fielddef_offset(this); } - int16_t hasbit() const { return upb_fielddef_hasbit(this); } - - bool set_type(FieldType type) { return upb_fielddef_settype(this, type); } - bool set_label(Label label) { return upb_fielddef_setlabel(this, label); } - void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); } - void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); } - void set_fval(Value fval) { upb_fielddef_setfval(this, fval); } - void set_accessor(struct _upb_accessor_vtbl* vtbl) { - upb_fielddef_setaccessor(this, vtbl); - } - MessageDef* message(); - const MessageDef* message() const; - - struct _upb_accessor_vtbl *accessor() const { - return upb_fielddef_accessor(this); - } - - // "Number" and "name" must be set before the fielddef is added to a msgdef. - // For the moment we do not allow these to be set once the fielddef is added - // to a msgdef -- this could be relaxed in the future. - bool set_number(int32_t number) { - return upb_fielddef_setnumber(this, number); - } - bool set_name(const char *name) { return upb_fielddef_setname(this, name); } - bool set_name(const std::string& name) { return set_name(name.c_str()); } - - // Default value. //////////////////////////////////////////////////////////// - - // Returns the default value for this fielddef, which may either be something - // the client set explicitly or the "default default" (0 for numbers, empty - // for strings). The field's type indicates the type of the returned value, - // except for enum fields that are still mutable. - // - // For enums the default can be set either numerically or symbolically -- the - // upb_fielddef_default_is_symbolic() function below will indicate which it - // is. For string defaults, the value will be a upb_byteregion which is - // invalidated by any other non-const call on this object. Once the fielddef - // is finalized, symbolic enum defaults are resolved, so finalized enum - // fielddefs always have a default of type int32. - Value defaultval() { return upb_fielddef_default(this); } - - // Sets default value for the field. For numeric types, use - // upb_fielddef_setdefault(), and "value" must match the type of the field. - // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may - // use either, since the default may be set either numerically or - // symbolically. - // - // NOTE: May only be called for fields whose type has already been set. - // Also, will be reset to default if the field's type is set again. - void set_default(Value value) { upb_fielddef_setdefault(this, value); } - void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); } - void set_default(const char *str, size_t len) { - upb_fielddef_setdefaultstr(this, str, len); - } - void set_default(const std::string& str) { - upb_fielddef_setdefaultstr(this, str.c_str(), str.size()); - } - - // The results of this function are only meaningful for mutable enum fields, - // which can have a default specified either as an integer or as a string. - // If this returns true, the default returned from upb_fielddef_default() is - // a string, otherwise it is an integer. - bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); } - - // Subdef. /////////////////////////////////////////////////////////////////// - - // Submessage and enum fields must reference a "subdef", which is the - // MessageDef or EnumDef that defines their type. Note that when the - // FieldDef is mutable it may not have a subdef *yet*, but this still returns - // true to indicate that the field's type requires a subdef. - bool HasSubDef() { return upb_hassubdef(this); } - - // Before a FieldDef is finalized, its subdef may be set either directly - // (with a Def*) or symbolically. Symbolic refs must be resolved by the - // client before the containing msgdef can be finalized. - // - // Both methods require that HasSubDef() (so the type must be set prior to - // calling these methods). Returns false if this is not the case, or if the - // given subdef is not of the correct type. The subtype is reset if the - // field's type is changed. - bool set_subdef(Def* def); - bool set_subtype_name(const char *name) { - return upb_fielddef_setsubtypename(this, name); - } - bool set_subtype_name(const std::string& str) { - return set_subtype_name(str.c_str()); - } - - // Returns the enum or submessage def or symbolic name for this field, if - // any. May only be called for fields where HasSubDef() is true. Returns - // NULL if the subdef has not been set or if you ask for a subtype name when - // the subtype is currently set symbolically (or vice-versa). - // - // Caller does *not* own a ref on the returned def or string. - // subtypename_name() is non-const because only mutable defs can have the - // subtype name set symbolically (symbolic references must be resolved before - // the MessageDef can be finalized). - const Def* subdef() const; - const char *subtype_name() { return upb_fielddef_subtypename(this); } - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef); -}; - -class Def : public upb_def { - public: - // Converting from C types to C++ wrapper types. - static Def* Cast(upb_def *def) { return static_cast(def); } - static const Def* Cast(const upb_def *def) { - return static_cast(def); - } - - void Ref(const void *owner) const { upb_def_ref(this, owner); } - void Unref(const void *owner) const { upb_def_unref(this, owner); } - - void set_full_name(const char *name) { upb_def_setfullname(this, name); } - void set_full_name(const std::string& name) { - upb_def_setfullname(this, name.c_str()); - } - - const char *full_name() const { return upb_def_fullname(this); } - - // Finalizes the given list of defs (as well as the fielddefs for the given - // msgdefs). All defs reachable from any def in this list must either be - // already finalized or elsewhere in the list. Any symbolic references to - // enums or submessages must already have been resolved. Returns true on - // success, otherwise false is returned and status contains details. In the - // error case the input defs are unmodified. See the comment at the top of - // this file for the semantics of finalized defs. - // - // n is currently limited to 64k defs, if more are required break them into - // batches of 64k (or we could raise this limit, at the cost of a bigger - // upb_def structure or complexity in upb_def_finalize()). - static bool Finalize(Def*const* defs, int n, Status* status) { - return upb_finalize(reinterpret_cast(defs), n, status); - } - static bool Finalize(const std::vector& defs, Status* status) { - return Finalize(&defs[0], defs.size(), status); - } -}; - -class MessageDef : public upb_msgdef { - public: - // Converting from C types to C++ wrapper types. - static MessageDef* Cast(upb_msgdef *md) { - return static_cast(md); - } - static const MessageDef* Cast(const upb_msgdef *md) { - return static_cast(md); - } - static MessageDef* DynamicCast(Def* def) { - return Cast(upb_dyncast_msgdef(def)); - } - static const MessageDef* DynamicCast(const Def* def) { - return Cast(upb_dyncast_msgdef_const(def)); - } - - Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } - const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } - - static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); } - MessageDef* Dup(void *owner) const { - return Cast(upb_msgdef_dup(this, owner)); - } - - void Ref(const void *owner) const { upb_msgdef_ref(this, owner); } - void Unref(const void *owner) const { upb_msgdef_unref(this, owner); } - - // Read accessors -- may be called at any time. - - const char *full_name() const { return AsDef()->full_name(); } - - // The total size of in-memory messages created with this MessageDef. - uint16_t instance_size() const { return upb_msgdef_size(this); } - - // The number of "hasbit" bytes in a message instance. - uint8_t hasbit_bytes() const { return upb_msgdef_hasbit_bytes(this); } - - uint32_t extension_start() const { return upb_msgdef_extstart(this); } - uint32_t extension_end() const { return upb_msgdef_extend(this); } - - // Write accessors. May only be called before the msgdef is in a symtab. - - void set_full_name(const char *name) { AsDef()->set_full_name(name); } - void set_full_name(const std::string& name) { AsDef()->set_full_name(name); } - - void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); } - void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); } - bool SetExtensionRange(uint32_t start, uint32_t end) { - return upb_msgdef_setextrange(this, start, end); - } - - // Adds a set of fields (FieldDef objects) to a MessageDef. Caller passes a - // ref on the FieldDef to the MessageDef in both success and failure cases. - // May only be done before the MessageDef is in a SymbolTable (requires - // m->IsMutable() for the MessageDef). The FieldDef's name and number must - // be set, and the message may not already contain any field with this name - // or number, and this FieldDef may not be part of another message, otherwise - // false is returned and the MessageDef is unchanged. - bool AddField(FieldDef* f, const void *owner) { - return AddFields(&f, 1, owner); - } - bool AddFields(FieldDef*const * f, int n, const void *owner) { - return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner); - } - bool AddFields(const std::vector& fields, const void *owner) { - return AddFields(&fields[0], fields.size(), owner); - } - - int field_count() const { return upb_msgdef_numfields(this); } - - // Lookup fields by name or number, returning NULL if no such field exists. - FieldDef* FindFieldByName(const char *name) { - return FieldDef::Cast(upb_msgdef_ntof(this, name)); - } - FieldDef* FindFieldByName(const std::string& name) { - return FieldDef::Cast(upb_msgdef_ntof(this, name.c_str())); - } - FieldDef* FindFieldByNumber(uint32_t num) { - return FieldDef::Cast(upb_msgdef_itof(this, num)); - } - - const FieldDef* FindFieldByName(const char *name) const { - return FindFieldByName(name); - } - const FieldDef* FindFieldByName(const std::string& name) const { - return FindFieldByName(name); - } - const FieldDef* FindFieldByNumber(uint32_t num) const { - return FindFieldByNumber(num); - } - - class Iterator : public upb_msg_iter { - public: - explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); } - Iterator() {} - - FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); } - bool Done() { return upb_msg_done(this); } - void Next() { return upb_msg_next(this); } - }; - - class ConstIterator : public upb_msg_iter { - public: - explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); } - ConstIterator() {} - - const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); } - bool Done() { return upb_msg_done(this); } - void Next() { return upb_msg_next(this); } - }; - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef); -}; - -class EnumDef : public upb_enumdef { - public: - // Converting from C types to C++ wrapper types. - static EnumDef* Cast(upb_enumdef *e) { return static_cast(e); } - static const EnumDef* Cast(const upb_enumdef *e) { - return static_cast(e); - } - - static EnumDef* New(const void *owner) { return Cast(upb_enumdef_new(owner)); } - - void Ref(const void *owner) { upb_enumdef_ref(this, owner); } - void Unref(const void *owner) { upb_enumdef_unref(this, owner); } - EnumDef* Dup(const void *owner) const { - return Cast(upb_enumdef_dup(this, owner)); - } - - Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } - const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } - - int32_t default_value() const { return upb_enumdef_default(this); } - - // May only be set if IsMutable(). - void set_full_name(const char *name) { AsDef()->set_full_name(name); } - void set_full_name(const std::string& name) { AsDef()->set_full_name(name); } - void set_default_value(int32_t val) { - return upb_enumdef_setdefault(this, val); - } - - // Adds a value to the enumdef. Requires that no existing val has this - // name or number (returns false and does not add if there is). May only - // be called if IsMutable(). - bool AddValue(char *name, int32_t num) { - return upb_enumdef_addval(this, name, num); - } - bool AddValue(const std::string& name, int32_t num) { - return upb_enumdef_addval(this, name.c_str(), num); - } - - // Lookups from name to integer and vice-versa. - bool LookupName(const char *name, int32_t* num) const { - return upb_enumdef_ntoi(this, name, num); - } - - // Lookup from integer to name, returns a NULL-terminated string which - // the caller does not own, or NULL if not found. - const char *LookupNumber(int32_t num) const { - return upb_enumdef_iton(this, num); - } - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef); -}; - -class SymbolTable : public upb_symtab { - public: - // Converting from C types to C++ wrapper types. - static SymbolTable* Cast(upb_symtab *s) { - return static_cast(s); - } - static const SymbolTable* Cast(const upb_symtab *s) { - return static_cast(s); - } - - static SymbolTable* New(const void *owner) { - return Cast(upb_symtab_new(owner)); - } - - void Ref(const void *owner) const { upb_symtab_unref(this, owner); } - void Unref(const void *owner) const { upb_symtab_unref(this, owner); } - void DonateRef(const void *from, const void *to) const { - upb_symtab_donateref(this, from, to); - } - - // Adds the given defs to the symtab, resolving all symbols. Only one def - // per name may be in the list, but defs can replace existing defs in the - // symtab. The entire operation either succeeds or fails. If the operation - // fails, the symtab is unchanged, false is returned, and status indicates - // the error. The caller passes a ref on the defs in all cases. - bool Add(Def *const *defs, int n, void *owner, Status* status) { - return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status); - } - bool Add(const std::vector& defs, void *owner, Status* status) { - return Add(&defs[0], defs.size(), owner, status); - } - - // If the given name refers to a message in this symbol table, returns a new - // ref to that MessageDef object, otherwise returns NULL. - const MessageDef* LookupMessage(const char *name, void *owner) const { - return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner)); - } - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable); -}; - -template <> inline const FieldDef* GetValue(Value v) { - return static_cast(upb_value_getfielddef(v)); -} - -template <> inline Value MakeValue(FieldDef* v) { - return upb_value_fielddef(v); -} - -inline MessageDef* FieldDef::message() { - return MessageDef::Cast(upb_fielddef_msgdef(this)); -} -inline const MessageDef* FieldDef::message() const { - return MessageDef::Cast(upb_fielddef_msgdef(this)); -} - -inline const Def* FieldDef::subdef() const { - return Def::Cast(upb_fielddef_subdef(this)); -} -inline bool FieldDef::set_subdef(Def* def) { - return upb_fielddef_setsubdef(this, def); -} - -} // namespace upb - -#endif diff --git a/bindings/cpp/upb/handlers.cc b/bindings/cpp/upb/handlers.cc deleted file mode 100644 index c96a74e..0000000 --- a/bindings/cpp/upb/handlers.cc +++ /dev/null @@ -1,39 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman - -#include "handlers.hpp" - -#include "def.hpp" - -namespace upb { - -namespace { - -void MessageCallbackWrapper( - void* closure, upb_mhandlers* mh, const upb_msgdef* m) { - Handlers::MessageRegistrationVisitor* visitor = - static_cast(closure); - visitor->OnMessage(static_cast(mh), - static_cast(m)); -} - -void FieldCallbackWrapper( - void* closure, upb_fhandlers* fh, const upb_fielddef* f) { - Handlers::MessageRegistrationVisitor* visitor = - static_cast(closure); - visitor->OnField(static_cast(fh), - static_cast(f)); -} -} // namepace - -MessageHandlers* Handlers::RegisterMessageDef( - const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) { - upb_mhandlers* mh = upb_handlers_regmsgdef( - this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor); - return static_cast(mh); -} - -} // namespace upb diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp deleted file mode 100644 index a366c3d..0000000 --- a/bindings/cpp/upb/handlers.hpp +++ /dev/null @@ -1,176 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// upb::Handlers is a generic visitor-like interface for iterating over a -// stream of protobuf data. You can register function pointers that will be -// called for each message and/or field as the data is being parsed or iterated -// over, without having to know the source format that we are parsing from. -// This decouples the parsing logic from the processing logic. - -#ifndef UPB_HANDLERS_HPP -#define UPB_HANDLERS_HPP - -#include "upb/handlers.h" - -#include "upb/upb.hpp" - -namespace upb { - -typedef upb_fieldtype_t FieldType; -typedef upb_flow_t Flow; -typedef upb_sflow_t SubFlow; -class MessageHandlers; -class MessageDef; -class FieldDef; - -class FieldHandlers : public upb_fhandlers { - public: - typedef upb_value_handler ValueHandler; - typedef upb_startfield_handler StartFieldHandler; - typedef upb_endfield_handler EndFieldHandler; - - // The FieldHandlers will live at least as long as the upb::Handlers to - // which it belongs, but can be Ref'd/Unref'd to make it live longer (which - // will prolong the life of the underlying upb::Handlers also). - void Ref() { upb_fhandlers_ref(this); } - void Unref() { upb_fhandlers_unref(this); } - - // Functions to set this field's handlers. - // These return "this" so they can be conveniently chained, eg. - // message_handlers->NewField(...) - // ->SetStartSequenceHandler(&StartSequence), - // ->SetEndSequenceHandler(&EndSequence), - // ->SetValueHandler(&Value); - FieldHandlers* SetValueHandler(ValueHandler* h) { - upb_fhandlers_setvalue(this, h); return this; - } - FieldHandlers* SetStartSequenceHandler(StartFieldHandler* h) { - upb_fhandlers_setstartseq(this, h); return this; - } - FieldHandlers* SetEndSequenceHandler(EndFieldHandler* h) { - upb_fhandlers_setendseq(this, h); return this; - } - FieldHandlers* SetStartSubmessageHandler(StartFieldHandler* h) { - upb_fhandlers_setstartsubmsg(this, h); return this; - } - FieldHandlers* SetEndSubmessageHandler(EndFieldHandler* h) { - upb_fhandlers_setendsubmsg(this, h); return this; - } - - // Get/Set the field's bound value, which will be passed to its handlers. - Value GetBoundValue() const { return upb_fhandlers_getfval(this); } - FieldHandlers* SetBoundValue(Value val) { - upb_fhandlers_setfval(this, val); return this; - } - - // Returns the MessageHandlers to which we belong. - MessageHandlers* GetMessageHandlers() const; - // Returns the MessageHandlers for this field's submessage (invalid to call - // unless this field's type UPB_TYPE(MESSAGE) or UPB_TYPE(GROUP). - MessageHandlers* GetSubMessageHandlers() const; - // If set to >=0, the given hasbit will be set after the value callback is - // called (offset relative to the current closure). - int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); } - void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); } - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers); -}; - -class MessageHandlers : public upb_mhandlers { - public: - typedef upb_startmsg_handler StartMessageHandler; - typedef upb_endmsg_handler EndMessageHandler; - - static MessageHandlers* Cast(upb_mhandlers* mh) { - return static_cast(mh); - } - static const MessageHandlers* Cast(const upb_mhandlers* mh) { - return static_cast(mh); - } - - // The MessageHandlers will live at least as long as the upb::Handlers to - // which it belongs, but can be Ref'd/Unref'd to make it live longer (which - // will prolong the life of the underlying upb::Handlers also). - void Ref() { upb_mhandlers_ref(this); } - void Unref() { upb_mhandlers_unref(this); } - - // Functions to set this message's handlers. - // These return "this" so they can be conveniently chained, eg. - // handlers->NewMessageHandlers() - // ->SetStartMessageHandler(&StartMessage) - // ->SetEndMessageHandler(&EndMessage); - MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) { - upb_mhandlers_setstartmsg(this, h); return this; - } - MessageHandlers* SetEndMessageHandler(EndMessageHandler* h) { - upb_mhandlers_setendmsg(this, h); return this; - } - - // Functions to create new FieldHandlers for this message. - FieldHandlers* NewFieldHandlers(uint32_t fieldnum, FieldType type, - bool repeated) { - return static_cast( - upb_mhandlers_newfhandlers(this, fieldnum, type, repeated)); - } - - // Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the - // given submessage must not have any fields with this field number. - FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name, - FieldType type, bool repeated, - MessageHandlers* subm) { - (void)name; - return static_cast( - upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm)); - } - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers); -}; - -class Handlers : public upb_handlers { - public: - // Creates a new Handlers instance. - static Handlers* New() { return static_cast(upb_handlers_new()); } - - void Ref() { upb_handlers_ref(this); } - void Unref() { upb_handlers_unref(this); } - - // Returns a new MessageHandlers object. The first such message that is - // obtained will be the top-level message for this Handlers object. - MessageHandlers* NewMessageHandlers() { - return static_cast(upb_handlers_newmhandlers(this)); - } - - // Convenience function for registering handlers for all messages and fields - // in a MessageDef and all its children. For every registered message, - // OnMessage will be called on the visitor with newly-created MessageHandlers - // and MessageDef. Likewise with OnField will be called with newly-created - // FieldHandlers and FieldDef for each field. - class MessageRegistrationVisitor { - public: - virtual ~MessageRegistrationVisitor() {} - virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0; - virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0; - }; - MessageHandlers* RegisterMessageDef(const MessageDef& m, - MessageRegistrationVisitor* visitor); - - private: - UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers); -}; - -inline MessageHandlers* FieldHandlers::GetMessageHandlers() const { - return static_cast(upb_fhandlers_getmsg(this)); -} - -inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const { - return static_cast(upb_fhandlers_getsubmsg(this)); -} - -} // namespace upb - -#endif diff --git a/bindings/cpp/upb/msg.hpp b/bindings/cpp/upb/msg.hpp deleted file mode 100644 index cde1743..0000000 --- a/bindings/cpp/upb/msg.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// Routines for reading and writing message data to an in-memory structure, -// similar to a C struct. -// -// upb does not define one single message object that everyone must use. -// Rather it defines an abstract interface for reading and writing members -// of a message object, and all of the parsers and serializers use this -// abstract interface. This allows upb's parsers and serializers to be used -// regardless of what memory management scheme or synchronization model the -// application is using. -// -// A standard set of accessors is provided for doing simple reads and writes at -// a known offset into the message. These accessors should be used when -// possible, because they are specially optimized -- for example, the JIT can -// recognize them and emit specialized code instead of having to call the -// function at all. The application can substitute its own accessors when the -// standard accessors are not suitable. - -#ifndef UPB_MSG_HPP -#define UPB_MSG_HPP - -#include "upb/msg.h" -#include "upb/handlers.hpp" - -namespace upb { - -typedef upb_accessor_vtbl AccessorVTable; - -// Registers handlers for writing into a message of the given type using -// whatever accessors it has defined. -inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers, - const upb::MessageDef* md) { - return MessageHandlers::Cast( - upb_accessors_reghandlers(handlers, md)); -} - -template static FieldHandlers::ValueHandler* GetValueHandler(); - -// A handy templated function that will retrieve a value handler for a given -// C++ type. -#define GET_VALUE_HANDLER(type, ctype) \ - template <> \ - inline FieldHandlers::ValueHandler* GetValueHandler() { \ - return &upb_stdmsg_set ## type; \ - } - -GET_VALUE_HANDLER(double, double); -GET_VALUE_HANDLER(float, float); -GET_VALUE_HANDLER(uint64, uint64_t); -GET_VALUE_HANDLER(uint32, uint32_t); -GET_VALUE_HANDLER(int64, int64_t); -GET_VALUE_HANDLER(int32, int32_t); -GET_VALUE_HANDLER(bool, bool); -#undef GET_VALUE_HANDLER - -} // namespace - -#endif diff --git a/bindings/cpp/upb/pb/decoder.hpp b/bindings/cpp/upb/pb/decoder.hpp index 05bcb8a..950e9e2 100644 --- a/bindings/cpp/upb/pb/decoder.hpp +++ b/bindings/cpp/upb/pb/decoder.hpp @@ -22,14 +22,14 @@ #include "upb/pb/decoder.h" -#include "upb/bytestream.hpp" -#include "upb/upb.hpp" +#include "upb/bytestream.h" +#include "upb/upb.h" namespace upb { class DecoderPlan : public upb_decoderplan { public: - static DecoderPlan* New(Handlers* h, bool allow_jit) { + static DecoderPlan* New(const Handlers* h, bool allow_jit) { return static_cast(upb_decoderplan_new(h, allow_jit)); } void Unref() { upb_decoderplan_unref(this); } @@ -54,9 +54,7 @@ class Decoder : public upb_decoder { // reset to a different plan. // // Must be called before ResetInput() or Decode(). - void ResetPlan(DecoderPlan* plan, int32_t msg_offset) { - upb_decoder_resetplan(this, plan, msg_offset); - } + void ResetPlan(DecoderPlan* plan) { upb_decoder_resetplan(this, plan); } // Resets the input of the decoder. This puts it in a state where it has not // seen any data, and expects the next data to be from the beginning of a new @@ -71,7 +69,7 @@ class Decoder : public upb_decoder { // Decodes serialized data (calling Handlers as the data is parsed) until // error or EOF (see status() for details). - Success Decode() { return upb_decoder_decode(this); } + Status::Success Decode() { return upb_decoder_decode(this); } const upb::Status& status() { return static_cast(*upb_decoder_status(this)); diff --git a/bindings/cpp/upb/pb/glue.hpp b/bindings/cpp/upb/pb/glue.hpp deleted file mode 100644 index d43baeb..0000000 --- a/bindings/cpp/upb/pb/glue.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2011 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#ifndef UPB_PB_GLUE_HPP -#define UPB_PB_GLUE_HPP - -#include "upb/upb.hpp" -#include "upb/pb/glue.h" - -namespace upb { - -// All routines that load descriptors expect the descriptor to be a -// FileDescriptorSet. -bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname, - Status* status) { - return upb_load_descriptor_file_into_symtab(s, fname, status); -} - -bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str, - size_t len, Status* status) { - return upb_load_descriptor_into_symtab(s, str, len, status); -} - -template -bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) { - return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status); -} - -} // namespace upb - -#endif diff --git a/bindings/cpp/upb/proto2_bridge.cc b/bindings/cpp/upb/proto2_bridge.cc deleted file mode 100644 index 6119295..0000000 --- a/bindings/cpp/upb/proto2_bridge.cc +++ /dev/null @@ -1,892 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. -// Author: Josh Haberman - -#include -#include -#include "upb/bytestream.hpp" -#include "upb/def.hpp" -#include "upb/handlers.hpp" -#include "upb/msg.hpp" -#include "upb/proto2_bridge.hpp" - -namespace { - -static void* GetFieldPointer(void *message, const upb::FieldDef* f) { - return static_cast(message) + f->offset(); -} - -} // namespace - -#ifdef UPB_GOOGLE3 - -// TODO(haberman): friend upb so that this isn't required. -#define protected public -#include "net/proto2/public/repeated_field.h" -#undef private - -#define private public -#include "net/proto/proto2_reflection.h" -#undef private - -#include "net/proto2/proto/descriptor.pb.h" -#include "net/proto2/public/descriptor.h" -#include "net/proto2/public/generated_message_reflection.h" -#include "net/proto2/public/lazy_field.h" -#include "net/proto2/public/message.h" -#include "net/proto2/public/string_piece_field_support.h" -#include "net/proto/internal_layout.h" -#include "strings/cord.h" -using ::proto2::Descriptor; -using ::proto2::EnumDescriptor; -using ::proto2::EnumValueDescriptor; -using ::proto2::FieldDescriptor; -using ::proto2::FieldOptions; -using ::proto2::FileDescriptor; -using ::proto2::internal::GeneratedMessageReflection; -using ::proto2::internal::RepeatedPtrFieldBase; -using ::proto2::internal::StringPieceField; -using ::proto2::Message; -using ::proto2::MessageFactory; -using ::proto2::Reflection; -using ::proto2::RepeatedField; -using ::proto2::RepeatedPtrField; - -namespace upb { - -static const Message* GetPrototypeForField(const Message& m, - const FieldDescriptor* f); - -namespace proto2_bridge_google3 { class FieldAccessor; } - -using ::upb::proto2_bridge_google3::FieldAccessor; - -namespace proto2_bridge_google3 { - -static void AssignToCord(const ByteRegion* r, Cord* cord) { - // TODO(haberman): ref source data if source is a cord. - cord->Clear(); - uint64_t ofs = r->start_ofs(); - while (ofs < r->end_ofs()) { - size_t len; - const char *buf = r->GetPtr(ofs, &len); - cord->Append(StringPiece(buf, len)); - ofs += len; - } -} - -#else - -// TODO(haberman): friend upb so that this isn't required. -#define protected public -#include "google/protobuf/repeated_field.h" -#undef protected - -#define private public -#include "google/protobuf/generated_message_reflection.h" -#undef private - -#include "google/protobuf/descriptor.h" -#include "google/protobuf/descriptor.pb.h" -#include "google/protobuf/message.h" -using ::google::protobuf::Descriptor; -using ::google::protobuf::EnumDescriptor; -using ::google::protobuf::EnumValueDescriptor; -using ::google::protobuf::FieldDescriptor; -using ::google::protobuf::FieldOptions; -using ::google::protobuf::FileDescriptor; -using ::google::protobuf::internal::GeneratedMessageReflection; -using ::google::protobuf::internal::RepeatedPtrFieldBase; -using ::google::protobuf::Message; -using ::google::protobuf::MessageFactory; -using ::google::protobuf::Reflection; -using ::google::protobuf::RepeatedField; -using ::google::protobuf::RepeatedPtrField; - -namespace upb { -static const Message* GetPrototypeForField(const Message& m, - const FieldDescriptor* f); - -namespace proto2_bridge_opensource { class FieldAccessor; } - -using ::upb::proto2_bridge_opensource::FieldAccessor; - -namespace proto2_bridge_opensource { - -#endif // ifdef UPB_GOOGLE3 - -// Have to define this manually since older versions of proto2 didn't define -// an enum value for STRING. -#define UPB_CTYPE_STRING 0 - -// The code in this class depends on the internal representation of the proto2 -// generated classes, which is an internal implementation detail of proto2 and -// is not a public interface. As a result, this class's implementation may -// need to be changed if/when proto2 changes its internal representation. It -// is intended that this class is the only code that depends on these internal, -// non-public interfaces. -// -// This class only works with messages that use GeneratedMessageReflection. -// Other reflection classes will need other accessor implementations. -class FieldAccessor { - public: - // Returns true if we were able to set an accessor and any other properties - // of the FieldDef that are necessary to read/write this field to a - // proto2::Message. - static bool TrySet(const FieldDescriptor* proto2_f, - const upb::MessageDef* md, - upb::FieldDef* upb_f) { - const Message* prototype = static_cast(md->prototype); - const Reflection* base_r = prototype->GetReflection(); - const GeneratedMessageReflection* r = - dynamic_cast(base_r); - // Old versions of the open-source protobuf release erroneously default to - // Cord even though that has never been supported in the open-source - // release. - int32_t ctype = proto2_f->options().has_ctype() ? - proto2_f->options().ctype() : UPB_CTYPE_STRING; - if (!r) return false; - // Extensions not supported yet. - if (proto2_f->is_extension()) return false; - - upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype)); - upb_f->set_hasbit(GetHasbit(proto2_f, r)); - upb_f->set_offset(GetOffset(proto2_f, r)); - if (upb_f->IsSubmessage()) { - upb_f->set_subtype_name(proto2_f->message_type()->full_name()); - upb_f->prototype = GetPrototypeForField(*prototype, proto2_f); - } - - if (upb_f->IsString() && !upb_f->IsSequence() && - ctype == UPB_CTYPE_STRING) { - upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL); - } - return true; - } - - static MessageFactory* GetMessageFactory(const Message& m) { - const GeneratedMessageReflection* r = - dynamic_cast(m.GetReflection()); - return r ? r->message_factory_ : NULL; - } - - private: - static int64_t GetHasbit(const FieldDescriptor* f, - const GeneratedMessageReflection* r) { - if (f->is_repeated()) { - // proto2 does not store hasbits for repeated fields. - return -1; - } else { - return (r->has_bits_offset_ * 8) + f->index(); - } - } - - static uint16_t GetOffset(const FieldDescriptor* f, - const GeneratedMessageReflection* r) { - return r->offsets_[f->index()]; - } - - static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f, - int32_t ctype) { - switch (f->cpp_type()) { - case FieldDescriptor::CPPTYPE_ENUM: - // Should handlers validate enum membership to match proto2? - case FieldDescriptor::CPPTYPE_INT32: return Get(); - case FieldDescriptor::CPPTYPE_INT64: return Get(); - case FieldDescriptor::CPPTYPE_UINT32: return Get(); - case FieldDescriptor::CPPTYPE_UINT64: return Get(); - case FieldDescriptor::CPPTYPE_DOUBLE: return Get(); - case FieldDescriptor::CPPTYPE_FLOAT: return Get(); - case FieldDescriptor::CPPTYPE_BOOL: return Get(); - case FieldDescriptor::CPPTYPE_STRING: - switch (ctype) { -#ifdef UPB_GOOGLE3 - case FieldOptions::STRING: - return GetForString(); - case FieldOptions::CORD: - return GetForCord(); - case FieldOptions::STRING_PIECE: - return GetForStringPiece(); -#else - case UPB_CTYPE_STRING: - return GetForString(); -#endif - default: return NULL; - } - case FieldDescriptor::CPPTYPE_MESSAGE: -#ifdef UPB_GOOGLE3 - if (f->options().lazy()) { - return NULL; // Not yet implemented. - } else { - return GetForMessage(); - } -#else - return GetForMessage(); -#endif - default: return NULL; - } - } - - // PushOffset handler (used for StartSequence and others) /////////////////// - - static SubFlow PushOffset(void *m, Value fval) { - const FieldDef *f = GetValue(fval); - return UPB_CONTINUE_WITH(GetFieldPointer(m, f)); - } - - // Primitive Value (numeric, enum, bool) ///////////////////////////////////// - - template static AccessorVTable *Get() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - GetValueHandler(), - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &Append, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - template - static Flow Append(void *_r, Value fval, Value val) { - (void)fval; - RepeatedField* r = static_cast*>(_r); - r->Add(GetValue(val)); - return UPB_CONTINUE; - } - - // String //////////////////////////////////////////////////////////////////// - - template static AccessorVTable *GetForString() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - &SetString, - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &AppendString, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - // This needs to be templated because google3 string is not std::string. - template static Flow SetString(void *m, Value fval, Value val) { - const FieldDef* f = GetValue(fval); - T **str = static_cast(GetFieldPointer(m, f)); - // If it points to the default instance, we must create a new instance. - if (*str == f->prototype) *str = new T(); - GetValue(val)->AssignToString(*str); - return UPB_CONTINUE; - } - - template - static Flow AppendString(void *_r, Value fval, Value val) { - (void)fval; - RepeatedPtrField* r = static_cast*>(_r); - GetValue(val)->AssignToString(r->Add()); - return UPB_CONTINUE; - } - - // SubMessage //////////////////////////////////////////////////////////////// - - static AccessorVTable *GetForMessage() { - static upb_accessor_vtbl vtbl = { - &StartSubMessage, - NULL, // Value handler - &PushOffset, // StartSequence handler - &StartRepeatedSubMessage, - NULL, // Repeated value handler - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static SubFlow StartSubMessage(void *m, Value fval) { - const FieldDef* f = GetValue(fval); - void **subm = static_cast(GetFieldPointer(m, f)); - if (*subm == NULL || *subm == f->prototype) { - const Message* prototype = static_cast(f->prototype); - *subm = prototype->New(); - } - return UPB_CONTINUE_WITH(*subm); - } - - class RepeatedMessageTypeHandler { - public: - typedef void Type; - // AddAllocated() calls this, but only if other objects are sitting - // around waiting for reuse, which we will not do. - static void Delete(Type* t) { - (void)t; - assert(false); - } - }; - - // Closure is a RepeatedPtrField*, but we access it through - // its base class RepeatedPtrFieldBase*. - static SubFlow StartRepeatedSubMessage(void* _r, Value fval) { - const FieldDef* f = GetValue(fval); - RepeatedPtrFieldBase *r = static_cast(_r); - void *submsg = r->AddFromCleared(); - if (!submsg) { - const Message* prototype = static_cast(f->prototype); - submsg = prototype->New(); - r->AddAllocated(submsg); - } - return UPB_CONTINUE_WITH(submsg); - } - - // TODO(haberman): handle Extensions, Unknown Fields. - -#ifdef UPB_GOOGLE3 - // Handlers for types/features only included in internal proto2 release: - // Cord, StringPiece, LazyField, and MessageSet. - // TODO(haberman): LazyField, MessageSet. - - // Cord ////////////////////////////////////////////////////////////////////// - - static AccessorVTable *GetForCord() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - &SetCord, - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &AppendCord, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static Flow SetCord(void *m, Value fval, Value val) { - const FieldDef* f = GetValue(fval); - Cord* field = static_cast(GetFieldPointer(m, f)); - AssignToCord(GetValue(val), field); - return UPB_CONTINUE; - } - - static Flow AppendCord(void *_r, Value fval, Value val) { - RepeatedField* r = static_cast*>(_r); - AssignToCord(GetValue(val), r->Add()); - return UPB_CONTINUE; - } - - // StringPiece /////////////////////////////////////////////////////////////// - - static AccessorVTable *GetForStringPiece() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - &SetStringPiece, - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &AppendStringPiece, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static void AssignToStringPieceField(const ByteRegion* r, - proto2::internal::StringPieceField* f) { - // TODO(haberman): alias if possible and enabled on the input stream. - // TODO(haberman): add a method to StringPieceField that lets us avoid - // this copy/malloc/free. - char *data = new char[r->Length()]; - r->Copy(r->start_ofs(), r->Length(), data); - f->CopyFrom(StringPiece(data, r->Length())); - delete[] data; - } - - static Flow SetStringPiece(void *m, Value fval, Value val) { - const FieldDef* f = GetValue(fval); - StringPieceField* field = - static_cast(GetFieldPointer(m, f)); - AssignToStringPieceField(GetValue(val), field); - return UPB_CONTINUE; - } - - static Flow AppendStringPiece(void* _r, Value fval, Value val) { - RepeatedPtrField* r = - static_cast*>(_r); - AssignToStringPieceField(GetValue(val), r->Add()); - return UPB_CONTINUE; - } - -#endif // UPB_GOOGLE3 -}; - -#ifdef UPB_GOOGLE3 - -// Proto1 accessor -- only needed inside Google. -class Proto1FieldAccessor { - public: - // Returns true if we were able to set an accessor and any other properties - // of the FieldDef that are necessary to read/write this field to a - // proto2::Message. - static bool TrySet(const FieldDescriptor* proto2_f, - const upb::MessageDef* md, - upb::FieldDef* upb_f) { - const Message* m = static_cast(md->prototype); - const proto2::Reflection* base_r = m->GetReflection(); - const _pi::Proto2Reflection* r = - dynamic_cast(base_r); - if (!r) return false; - // Extensions not supported yet. - if (proto2_f->is_extension()) return false; - - const _pi::Field* f = r->GetFieldLayout(proto2_f); - - if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { - // Override the BYTES type that proto2 descriptors have for weak fields. - upb_f->set_type(UPB_TYPE(MESSAGE)); - } - - if (upb_f->IsSubmessage()) { - const Message* prototype = upb::GetPrototypeForField(*m, proto2_f); - upb_f->set_subtype_name(prototype->GetDescriptor()->full_name()); - upb_f->prototype = prototype; - } - - upb_f->set_accessor(GetForCrep(f->crep)); - upb_f->set_hasbit(GetHasbit(proto2_f, r)); - upb_f->set_offset(GetOffset(proto2_f, r)); - return true; - } - - private: - static int16_t GetHasbit(const FieldDescriptor* f, - const _pi::Proto2Reflection* r) { - if (f->is_repeated()) { - // proto1 does not store hasbits for repeated fields. - return -1; - } else { - return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; - } - } - - static uint16_t GetOffset(const FieldDescriptor* f, - const _pi::Proto2Reflection* r) { - return r->GetFieldLayout(f)->offset; - } - - static AccessorVTable *GetForCrep(int crep) { -#define PRIMITIVE(name, type_name) \ - case _pi::CREP_REQUIRED_ ## name: \ - case _pi::CREP_OPTIONAL_ ## name: \ - case _pi::CREP_REPEATED_ ## name: return Get(); - - switch (crep) { - PRIMITIVE(DOUBLE, double); - PRIMITIVE(FLOAT, float); - PRIMITIVE(INT64, int64_t); - PRIMITIVE(UINT64, uint64_t); - PRIMITIVE(INT32, int32_t); - PRIMITIVE(FIXED64, uint64_t); - PRIMITIVE(FIXED32, uint32_t); - PRIMITIVE(BOOL, bool); - case _pi::CREP_REQUIRED_STRING: - case _pi::CREP_OPTIONAL_STRING: - case _pi::CREP_REPEATED_STRING: return GetForString(); - case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString(); - case _pi::CREP_REQUIRED_CORD: - case _pi::CREP_OPTIONAL_CORD: - case _pi::CREP_REPEATED_CORD: return GetForCord(); - case _pi::CREP_REQUIRED_GROUP: - case _pi::CREP_REQUIRED_FOREIGN: - case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage(); - case _pi::CREP_OPTIONAL_GROUP: - case _pi::CREP_REPEATED_GROUP: - case _pi::CREP_OPTIONAL_FOREIGN: - case _pi::CREP_REPEATED_FOREIGN: - case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: - case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage(); - case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage(); - default: assert(false); return NULL; - } -#undef PRIMITIVE - } - - // PushOffset handler (used for StartSequence and others) /////////////////// - - // We can find a RepeatedField* or a RepeatedPtrField* at f->offset(). - static SubFlow PushOffset(void *m, Value fval) { - const FieldDef *f = GetValue(fval); - return UPB_CONTINUE_WITH(GetFieldPointer(m, f)); - } - - // Primitive Value (numeric, enum, bool) ///////////////////////////////////// - - template static AccessorVTable *Get() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - GetValueHandler(), - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &Append, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - template - static Flow Append(void *_r, Value fval, Value val) { - (void)fval; - // Proto1's ProtoArray class derives from RepeatedField. - RepeatedField* r = static_cast*>(_r); - r->Add(GetValue(val)); - return UPB_CONTINUE; - } - - // String //////////////////////////////////////////////////////////////////// - - static AccessorVTable *GetForString() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - &SetString, - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &AppendString, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static Flow SetString(void *m, Value fval, Value val) { - const FieldDef* f = GetValue(fval); - string *str = static_cast(GetFieldPointer(m, f)); - GetValue(val)->AssignToString(str); - return UPB_CONTINUE; - } - - static Flow AppendString(void *_r, Value fval, Value val) { - (void)fval; - RepeatedPtrField* r = static_cast*>(_r); - GetValue(val)->AssignToString(r->Add()); - return UPB_CONTINUE; - } - - // Out-of-line string //////////////////////////////////////////////////////// - - static AccessorVTable *GetForOutOfLineString() { - static upb_accessor_vtbl vtbl = { - NULL, &SetOutOfLineString, - // This type is only used for non-repeated string fields. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static Flow SetOutOfLineString(void *m, Value fval, Value val) { - const FieldDef* f = GetValue(fval); - string **str = static_cast(GetFieldPointer(m, f)); - if (*str == &::ProtocolMessage::___empty_internal_proto_string_) - *str = new string(); - GetValue(val)->AssignToString(*str); - return UPB_CONTINUE; - } - - // Cord ////////////////////////////////////////////////////////////////////// - - static AccessorVTable *GetForCord() { - static upb_accessor_vtbl vtbl = { - NULL, // StartSubMessage handler - &SetCord, - &PushOffset, // StartSequence handler - NULL, // StartRepeatedSubMessage handler - &AppendCord, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static Flow SetCord(void *m, Value fval, Value val) { - const FieldDef* f = GetValue(fval); - Cord* field = static_cast(GetFieldPointer(m, f)); - AssignToCord(GetValue(val), field); - return UPB_CONTINUE; - } - - static Flow AppendCord(void *_r, Value fval, Value val) { - RepeatedField* r = static_cast*>(_r); - AssignToCord(GetValue(val), r->Add()); - return UPB_CONTINUE; - } - - // SubMessage //////////////////////////////////////////////////////////////// - - static AccessorVTable *GetForRequiredMessage() { - static upb_accessor_vtbl vtbl = { - &PushOffset, // StartSubMessage handler - NULL, // Value handler - &PushOffset, // StartSequence handler - &StartRepeatedSubMessage, - NULL, // Repeated value handler - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static AccessorVTable *GetForWeakMessage() { - static upb_accessor_vtbl vtbl = { - &StartWeakSubMessage, // StartSubMessage handler - NULL, // Value handler - &PushOffset, // StartSequence handler - &StartRepeatedSubMessage, - NULL, // Repeated value handler - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static AccessorVTable *GetForMessage() { - static upb_accessor_vtbl vtbl = { - &StartSubMessage, - NULL, // Value handler - &PushOffset, // StartSequence handler - &StartRepeatedSubMessage, - NULL, // Repeated value handler - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - - static SubFlow StartSubMessage(void *m, Value fval) { - const FieldDef* f = GetValue(fval); - Message **subm = static_cast(GetFieldPointer(m, f)); - if (*subm == f->prototype) *subm = (*subm)->New(); - return UPB_CONTINUE_WITH(*subm); - } - - static SubFlow StartWeakSubMessage(void *m, Value fval) { - const FieldDef* f = GetValue(fval); - Message **subm = static_cast(GetFieldPointer(m, f)); - if (*subm == NULL) { - const Message* prototype = static_cast(f->prototype); - *subm = prototype->New(); - } - return UPB_CONTINUE_WITH(*subm); - } - - class RepeatedMessageTypeHandler { - public: - typedef void Type; - // AddAllocated() calls this, but only if other objects are sitting - // around waiting for reuse, which we will not do. - static void Delete(Type* t) { - (void)t; - assert(false); - } - }; - - // Closure is a RepeatedPtrField*, but we access it through - // its base class RepeatedPtrFieldBase*. - static SubFlow StartRepeatedSubMessage(void* _r, Value fval) { - const FieldDef* f = GetValue(fval); - RepeatedPtrFieldBase *r = static_cast(_r); - void *submsg = r->AddFromCleared(); - if (!submsg) { - const Message* prototype = static_cast(f->prototype); - submsg = prototype->New(); - r->AddAllocated(submsg); - } - return UPB_CONTINUE_WITH(submsg); - } -}; - -#endif - -} // namespace proto2_bridge_{google3,opensource} - -static const Message* GetPrototypeForMessage(const Message& m) { - const Message* ret = NULL; - MessageFactory* factory = FieldAccessor::GetMessageFactory(m); - if (factory) { - // proto2 generated message or DynamicMessage. - ret = factory->GetPrototype(m.GetDescriptor()); - assert(ret); - } else { - // Proto1 message; since proto1 has no dynamic message, it must be - // from the generated factory. - ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor()); - assert(ret); // If NULL, then wasn't a proto1 message, can't handle it. - } - assert(ret->GetReflection() == m.GetReflection()); - return ret; -} - -static const Message* GetPrototypeForField(const Message& m, - const FieldDescriptor* f) { -#ifdef UPB_GOOGLE3 - if (f->type() == FieldDescriptor::TYPE_BYTES) { - // Proto1 weak field: the proto2 descriptor says their type is BYTES. - const _pi::Proto2Reflection* r = - dynamic_cast(m.GetReflection()); - assert(r); - const _pi::Field* field = r->GetFieldLayout(f); - assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK); - return GetPrototypeForMessage( - *static_cast(field->weak_layout()->default_instance)); - } else if (dynamic_cast(m.GetReflection())) { - // Proto1 message; since proto1 has no dynamic message, it must be from - // the generated factory. - const Message* ret = - MessageFactory::generated_factory()->GetPrototype(f->message_type()); - assert(ret); - return ret; - } -#endif - assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE); - // We assume that all submessages (and extensions) will be constructed using - // the same MessageFactory as this message. This doesn't cover the case of - // CodedInputStream::SetExtensionRegistry(). - MessageFactory* factory = FieldAccessor::GetMessageFactory(m); - assert(factory); // If neither proto1 nor proto2 we can't handle it. - const Message* ret = factory->GetPrototype(f->message_type()); - assert(ret); - return ret; -} - -namespace proto2_bridge { - -upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) { - upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f); - upb_f->set_number(f->number()); - upb_f->set_name(f->name()); - upb_f->set_label(static_cast(f->label())); - upb_f->set_type(static_cast(f->type())); - - if (!FieldAccessor::TrySet(f, md, upb_f) -#ifdef UPB_GOOGLE3 - && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f) -#endif - ) { - // Unsupported reflection class. - assert(false); - } - - if (upb_f->type() == UPB_TYPE(ENUM)) { - // We set the enum default symbolically. - upb_f->set_default(f->default_value_enum()->name()); - upb_f->set_subtype_name(f->enum_type()->full_name()); - } else { - // Set field default for primitive types. Need to switch on the upb type - // rather than the proto2 type, because upb_f->type() may have been changed - // from BYTES to MESSAGE for a weak field. - switch (upb_types[upb_f->type()].inmemory_type) { - case UPB_CTYPE_INT32: - upb_f->set_default(MakeValue(f->default_value_int32())); - break; - case UPB_CTYPE_INT64: - upb_f->set_default( - MakeValue(static_cast(f->default_value_int64()))); - break; - case UPB_CTYPE_UINT32: - upb_f->set_default(MakeValue(f->default_value_uint32())); - break; - case UPB_CTYPE_UINT64: - upb_f->set_default( - MakeValue(static_cast(f->default_value_uint64()))); - break; - case UPB_CTYPE_DOUBLE: - upb_f->set_default(MakeValue(f->default_value_double())); - break; - case UPB_CTYPE_FLOAT: - upb_f->set_default(MakeValue(f->default_value_float())); - break; - case UPB_CTYPE_BOOL: - upb_f->set_default(MakeValue(f->default_value_bool())); - break; - case UPB_CTYPE_BYTEREGION: - upb_f->set_default(f->default_value_string()); - break; - } - } - return md->AddField(upb_f, &upb_f) ? upb_f : NULL; -} - -upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) { - upb::MessageDef *md = upb::MessageDef::New(owner); - md->set_full_name(m.GetDescriptor()->full_name()); - md->prototype = GetPrototypeForMessage(m); - return md; -} - -upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) { - upb::EnumDef* e = upb::EnumDef::New(owner); - e->set_full_name(desc->full_name()); - for (int i = 0; i < desc->value_count(); i++) { - const EnumValueDescriptor* val = desc->value(i); - bool success = e->AddValue(val->name(), val->number()); - assert(success); - (void)success; - } - return e; -} - -void AddAllFields(upb::MessageDef* md) { - const Descriptor* d = - static_cast(md->prototype)->GetDescriptor(); - for (int i = 0; i < d->field_count(); i++) { -#ifdef UPB_GOOGLE3 - // Skip lazy fields for now since we can't properly handle them. - if (d->field(i)->options().lazy()) continue; -#endif - // Extensions not supported yet. - if (d->field(i)->is_extension()) continue; - AddFieldDef(d->field(i), md); - } -} - -upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) { - upb::MessageDef* md = NewEmptyMessageDef(m, owner); - AddAllFields(md); - // TODO(haberman): add unknown field handler and extensions. - return md; -} - -typedef std::map SymbolMap; - -static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner, - SymbolMap* symbols) { - upb::MessageDef* md = NewFullMessageDef(m, owner); - // Must do this before processing submessages to prevent infinite recursion. - (*symbols)[std::string(md->full_name())] = md->AsDef(); - - for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) { - upb::FieldDef* f = i.field(); - if (!f->HasSubDef()) continue; - SymbolMap::iterator iter = symbols->find(f->subtype_name()); - upb::Def* subdef; - if (iter != symbols->end()) { - subdef = iter->second; - } else { - const FieldDescriptor* proto2_f = - m.GetDescriptor()->FindFieldByNumber(f->number()); - if (f->type() == UPB_TYPE(ENUM)) { - subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef(); - (*symbols)[std::string(subdef->full_name())] = subdef; - } else { - assert(f->IsSubmessage()); - const Message* prototype = GetPrototypeForField(m, proto2_f); - subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef(); - } - } - f->set_subdef(subdef); - } - return md; -} - -const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) { - SymbolMap symbols; - upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols); - - // Finalize defs. - std::vector defs; - SymbolMap::iterator iter; - for (iter = symbols.begin(); iter != symbols.end(); ++iter) { - defs.push_back(iter->second); - } - Status status; - bool success = Def::Finalize(defs, &status); - assert(success); - (void)success; - - // Unref all defs except the top-level one that we are returning. - for (int i = 0; i < static_cast(defs.size()); i++) { - if (defs[i] != ret->AsDef()) defs[i]->Unref(owner); - } - - return ret; -} - -} // namespace proto2_bridge -} // namespace upb diff --git a/bindings/cpp/upb/proto2_bridge.hpp b/bindings/cpp/upb/proto2_bridge.hpp deleted file mode 100644 index ace08ce..0000000 --- a/bindings/cpp/upb/proto2_bridge.hpp +++ /dev/null @@ -1,170 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// A bridge between upb and proto2, allows populating proto2 generated -// classes using upb's parser, translating between descriptors and defs, etc. -// -// This is designed to be able to be compiled against either the open-source -// version of protocol buffers or the Google-internal proto2. The two are -// the same in most ways, but live in different namespaces (proto2 vs -// google::protobuf) and have a few other more minor differences. -// -// The bridge gives you a lot of control over which fields will be written to -// the message (fields that are not written will just be skipped), and whether -// unknown fields are written to the UnknownFieldSet. This can save a lot of -// work if the client only cares about some subset of the fields. -// -// Example usage: -// -// // Build a def that will have all fields and parse just like proto2 would. -// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto()); -// -// // JIT the parser; should only be done once ahead-of-time. -// upb::Handlers* handlers = upb::NewHandlersForMessage(md); -// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers); -// handlers->Unref(); -// -// // The actual parsing. -// MyProto proto; -// upb::Decoder decoder; -// upb::StringSource source(buf, len); -// decoder.ResetPlan(plan, 0); -// decoder.ResetInput(source.AllBytes(), &proto); -// CHECK(decoder.Decode() == UPB_OK) << decoder.status(); -// -// To parse only one field and skip all others: -// -// const upb::MessageDef* md = -// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype()); -// upb::proto2_bridge::AddFieldDef( -// MyProto::descriptor()->FindFieldByName("my_field"), md); -// upb::Finalize(md); -// -// // Now continue with "JIT the parser" from above. -// -// Note that there is currently no support for -// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate -// DescriptorPool and MessageFactory for extensions. Since this is a property -// of the input in proto2, it's difficult to build a plan ahead-of-time that -// can properly support this. If it's an important use case, the caller should -// probably build a upb plan explicitly. - -#ifndef UPB_PROTO2_BRIDGE -#define UPB_PROTO2_BRIDGE - -#include - -namespace google { -namespace protobuf { -class Descriptor; -class EnumDescriptor; -class FieldDescriptor; -class FileDescriptor; -class Message; -} // namespace google -} // namespace protobuf - -namespace proto2 { -class Descriptor; -class EnumDescriptor; -class FieldDescriptor; -class FileDescriptor; -class Message; -} // namespace proto2 - - -namespace upb { - -class Def; -class FieldDef; -class MessageDef; - -namespace proto2_bridge { - -// Unfinalized defs //////////////////////////////////////////////////////////// - -// Creating of UNFINALIZED defs. All of these functions return defs that are -// still mutable and have not been finalized. They must be finalized before -// using them to parse anything. This is useful if you want more control over -// the process of constructing defs, eg. to add the specific set of fields you -// care about. - -// Creates a new upb::MessageDef that corresponds to the type in the given -// prototype message. The MessageDef will not have any fields added to it. -upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner); -upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc, - void *owner); - -// Adds a new upb::FieldDef to the given MessageDef corresponding to the given -// FieldDescriptor. The FieldDef will be given an accessor and offset so that -// it can be used to read and write data into the proto2::Message classes. -// The given MessageDef must have been constructed with NewEmptyDefForMessage() -// and f->containing_type() must correspond to the message that was used. -// -// Any submessage, group, or enum fields will be given symbolic references to -// the subtype, which must be resolved before the MessageDef can be finalized. -// -// On success, returns the FieldDef that was added (caller does not own a ref). -// If an existing field had the same name or number, returns NULL. -upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f, - upb::MessageDef* md); -upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f, - upb::MessageDef* md); - -// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds -// FieldDefs for all fields defined in the original message, but not for any -// extensions or unknown fields. The given MessageDef must not have any fields -// that have the same name or number as any of the fields we are adding (the -// easiest way to guarantee this is to start with an empty MessageDef). -// -// Returns true on success or false if any of the fields could not be added. -void AddAllFields(upb::MessageDef* md); - -// TODO(haberman): Add: -// // Adds a handler that will store unknown fields in the UnknownFieldSet. -// void AddUnknownFieldHandler(upb::MessageDef* md); - -// Returns a new upb::MessageDef that contains handlers for all fields, unknown -// fields, and any extensions in the descriptor's pool. The resulting -// def/handlers should be equivalent to the generated code constructed by the -// protobuf compiler (or the code in DynamicMessage) for the given type. -// The subdefs for message/enum fields (if any) will be referenced symbolically, -// and will need to be resolved before being finalized. -// -// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions). -// -// TODO(haberman): possibly add a similar function that lets you supply a -// separate DescriptorPool and MessageFactory for extensions, to support -// proto2's io::CodedInputStream::SetExtensionRegistry(). -upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner); -upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m, - void *owner); - -// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor. -// Caller owns a ref on the returned EnumDef. -upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner); -upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc, - void *owner); - -// Finalized defs ////////////////////////////////////////////////////////////// - -// These functions return FINALIZED defs, meaning that they are immutable and -// ready for use. Since they are immutable you cannot make any further changes -// to eg. the set of fields, but these functions are more convenient if you -// simply want to parse a message exactly how the built-in proto2 parser would. - -// Creates a returns a finalized MessageDef for the give message and its entire -// type tree that will include all fields and unknown handlers (ie. it will -// parse just like proto2 would). -const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m, - void *owner); -const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m, - void *owner); - -} // namespace proto2_bridge -} // namespace upb - -#endif diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp deleted file mode 100644 index 48c2708..0000000 --- a/bindings/cpp/upb/upb.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman - -#ifndef UPB_HPP -#define UPB_HPP - -#include "upb/upb.h" -#include - -#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11) -#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \ - class_name() = delete; \ - ~class_name() = delete; -#else -#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \ - class_name(); \ - ~class_name(); -#endif - -namespace upb { - -typedef upb_success_t Success; - -class Status : public upb_status { - public: - Status() { upb_status_init(this); } - ~Status() { upb_status_uninit(this); } - - bool ok() const { return upb_ok(this); } - bool eof() const { return upb_eof(this); } - - const char *GetString() const { return upb_status_getstr(this); } - void SetEof() { upb_status_seteof(this); } - void SetErrorLiteral(const char* msg) { - upb_status_seterrliteral(this, msg); - } - - void Clear() { upb_status_clear(this); } -}; - -typedef upb_value Value; - -template T GetValue(Value v); -template Value MakeValue(T v); - -#define UPB_VALUE_ACCESSORS(type, ctype) \ - template <> inline ctype GetValue(Value v) { \ - return upb_value_get ## type(v); \ - } \ - template <> inline Value MakeValue(ctype v) { \ - return upb_value_ ## type(v); \ - } - -UPB_VALUE_ACCESSORS(double, double); -UPB_VALUE_ACCESSORS(float, float); -UPB_VALUE_ACCESSORS(int32, int32_t); -UPB_VALUE_ACCESSORS(int64, int64_t); -UPB_VALUE_ACCESSORS(uint32, uint32_t); -UPB_VALUE_ACCESSORS(uint64, uint64_t); -UPB_VALUE_ACCESSORS(bool, bool); - -#undef UPB_VALUE_ACCESSORS - -template inline T* GetPtrValue(Value v) { - return static_cast(upb_value_getptr(v)); -} -template inline Value MakePtrValue(T* v) { - return upb_value_ptr(static_cast(v)); -} - -INLINE std::ostream& operator<<(std::ostream& out, const Status& status) { - out << status.GetString(); - return out; -} - -} // namespace upb - -#endif diff --git a/bindings/linux/Makefile b/bindings/linux/Makefile index e98aa3c..1736b61 100644 --- a/bindings/linux/Makefile +++ b/bindings/linux/Makefile @@ -1,7 +1,6 @@ obj-m = upb.o upb-objs = \ - setjmp.o \ ../../upb/upb.o \ ../../upb/bytestream.o \ ../../upb/def.o \ @@ -9,9 +8,6 @@ upb-objs = \ ../../upb/table.o \ ../../upb/refcount.o \ ../../upb/msg.o \ - ../../upb/pb/decoder.o \ - ../../upb/pb/textprinter.o \ - ../../upb/pb/varint.o \ KVERSION = $(shell uname -r) diff --git a/bindings/linux/ctype.h b/bindings/linux/ctype.h deleted file mode 100644 index b6cbda5..0000000 --- a/bindings/linux/ctype.h +++ /dev/null @@ -1,8 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2012 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#include diff --git a/bindings/linux/inttypes.h b/bindings/linux/inttypes.h deleted file mode 100644 index e7a6e42..0000000 --- a/bindings/linux/inttypes.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2012 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#ifndef PRId64 -#define PRId64 "ld" -#endif - -#ifndef PRIu64 -#define PRIu64 "lu" -#endif - -#ifndef PRId32 -#define PRId32 "d" -#endif - -#ifndef PRIu32 -#define PRIu32 "u" -#endif diff --git a/bindings/linux/setjmp.S b/bindings/linux/setjmp.S deleted file mode 100644 index 5eea4be..0000000 --- a/bindings/linux/setjmp.S +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2003 Peter Wemm. - * Copyright (c) 1993 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -.globl _setjmp, _longjmp - -_setjmp: - movq %rbx,0(%rdi) /* save rbx */ - movq %rsp,8(%rdi) /* save rsp */ - movq %rbp,16(%rdi) /* save rbp */ - movq %r12,24(%rdi) /* save r12 */ - movq %r13,32(%rdi) /* save r13 */ - movq %r14,40(%rdi) /* save r14 */ - movq %r15,48(%rdi) /* save r15 */ - movq 0(%rsp),%rdx /* get rta */ - movq %rdx,56(%rdi) /* save rip */ - xorl %eax,%eax /* return(0); */ - ret - -_longjmp: - movq 0(%rdi),%rbx /* restore rbx */ - movq 8(%rdi),%rsp /* restore rsp */ - movq 16(%rdi),%rbp /* restore rbp */ - movq 24(%rdi),%r12 /* restore r12 */ - movq 32(%rdi),%r13 /* restore r13 */ - movq 40(%rdi),%r14 /* restore r14 */ - movq 48(%rdi),%r15 /* restore r15 */ - movq 56(%rdi),%rdx /* get rta */ - movq %rdx,0(%rsp) /* put in return frame */ - xorl %eax,%eax /* return(1); */ - incl %eax - ret diff --git a/bindings/linux/setjmp.h b/bindings/linux/setjmp.h deleted file mode 100644 index c4716e6..0000000 --- a/bindings/linux/setjmp.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2012 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -// Linux doesn't provide setjmp/longjmp, boo. - -typedef void *jmp_buf[8]; - -extern int _setjmp(jmp_buf env); -__attribute__((__noreturn__)) extern void _longjmp(jmp_buf env, int val); diff --git a/bindings/linux/string.h b/bindings/linux/string.h index 69de3fa..30ebf8a 100644 --- a/bindings/linux/string.h +++ b/bindings/linux/string.h @@ -9,18 +9,5 @@ #define UPB_LINUX_STRING_H_ #include -#include -#include "upb/upb.h" // For INLINE. - -INLINE char *strdup(const char *s) { - size_t len = strlen(s); - char *ret = malloc(len + 1); - if (ret == NULL) return NULL; - // Be particularly defensive and guard against buffer overflow if there - // is a concurrent mutator. - strncpy(ret, s, len); - ret[len] = '\0'; - return ret; -} #endif /* UPB_DEF_H_ */ diff --git a/bindings/lua/LICENSE b/bindings/lua/LICENSE new file mode 100644 index 0000000..fb720fe --- /dev/null +++ b/bindings/lua/LICENSE @@ -0,0 +1,32 @@ + +Lunit License +------------- + +Lunit is written by Michael Roth and is licensed +under the terms of the MIT license reproduced below. + +======================================================================== + +Copyright (c) 2004-2010 Michael Roth + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +======================================================================== + diff --git a/bindings/lua/lunitx/atexit.lua b/bindings/lua/lunitx/atexit.lua new file mode 100644 index 0000000..c3cdddc --- /dev/null +++ b/bindings/lua/lunitx/atexit.lua @@ -0,0 +1,32 @@ + +local actions = {} + +local atexit + +if _VERSION >= 'Lua 5.2' then + + atexit = function (fn) + actions[#actions+1] = setmetatable({}, { __gc = fn }) + end + +else + + local newproxy = newproxy + local debug = debug + local assert = assert + local setmetatable = setmetatable + + local function gc(fn) + local p = assert(newproxy()) + assert(debug.setmetatable(p, { __gc = fn })) + return p + end + + atexit = function (fn) + actions[#actions+1] = gc(fn) + end + +end + +return atexit + diff --git a/bindings/lua/lunitx/lunit.lua b/bindings/lua/lunitx/lunit.lua new file mode 100644 index 0000000..8fa87de --- /dev/null +++ b/bindings/lua/lunitx/lunit.lua @@ -0,0 +1,725 @@ +--[[-------------------------------------------------------------------------- + + This file is part of lunit 0.5. + + For Details about lunit look at: http://www.mroth.net/lunit/ + + Author: Michael Roth + + Copyright (c) 2004, 2006-2010 Michael Roth + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +--]]-------------------------------------------------------------------------- + + +local orig_assert = assert + +local pairs = pairs +local ipairs = ipairs +local next = next +local type = type +local error = error +local tostring = tostring +local setmetatable = setmetatable +local pcall = pcall +local xpcall = xpcall +local require = require +local loadfile = loadfile + +local string_sub = string.sub +local string_gsub = string.gsub +local string_format = string.format +local string_lower = string.lower +local string_find = string.find + +local table_concat = table.concat + +local debug_getinfo = debug.getinfo + +local _G = _G + +local lunit + +if _VERSION >= 'Lua 5.2' then + + lunit = {} + _ENV = lunit + +else + + module("lunit") + lunit = _M + +end + + +local __failure__ = {} -- Type tag for failed assertions + +local typenames = { "nil", "boolean", "number", "string", "table", "function", "thread", "userdata" } + + +local traceback_hide -- Traceback function which hides lunit internals +local mypcall -- Protected call to a function with own traceback +do + local _tb_hide = setmetatable( {}, {__mode="k"} ) + + function traceback_hide(func) + _tb_hide[func] = true + end + + local function my_traceback(errobj) + if is_table(errobj) and errobj.type == __failure__ then + local info = debug_getinfo(5, "Sl") -- FIXME: Hardcoded integers are bad... + errobj.where = string_format( "%s:%d", info.short_src, info.currentline) + else + errobj = { msg = tostring(errobj) } + errobj.tb = {} + local i = 2 + while true do + local info = debug_getinfo(i, "Snlf") + if not is_table(info) then + break + end + if not _tb_hide[info.func] then + local line = {} -- Ripped from ldblib.c... + line[#line+1] = string_format("%s:", info.short_src) + if info.currentline > 0 then + line[#line+1] = string_format("%d:", info.currentline) + end + if info.namewhat ~= "" then + line[#line+1] = string_format(" in function '%s'", info.name) + else + if info.what == "main" then + line[#line+1] = " in main chunk" + elseif info.what == "C" or info.what == "tail" then + line[#line+1] = " ?" + else + line[#line+1] = string_format(" in function <%s:%d>", info.short_src, info.linedefined) + end + end + errobj.tb[#errobj.tb+1] = table_concat(line) + end + i = i + 1 + end + end + return errobj + end + + function mypcall(func) + orig_assert( is_function(func) ) + local ok, errobj = xpcall(func, my_traceback) + if not ok then + return errobj + end + end + traceback_hide(mypcall) +end + + +-- Type check functions + +for _, typename in ipairs(typenames) do + lunit["is_"..typename] = function(x) + return type(x) == typename + end +end + +local is_nil = is_nil +local is_boolean = is_boolean +local is_number = is_number +local is_string = is_string +local is_table = is_table +local is_function = is_function +local is_thread = is_thread +local is_userdata = is_userdata + + +local function failure(name, usermsg, defaultmsg, ...) + local errobj = { + type = __failure__, + name = name, + msg = string_format(defaultmsg,...), + usermsg = usermsg + } + error(errobj, 0) +end +traceback_hide( failure ) + + +local function format_arg(arg) + local argtype = type(arg) + if argtype == "string" then + return "'"..arg.."'" + elseif argtype == "number" or argtype == "boolean" or argtype == "nil" then + return tostring(arg) + else + return "["..tostring(arg).."]" + end +end + + +local function selected(map, name) + if not map then + return true + end + + local m = {} + for k,v in pairs(map) do + m[k] = lunitpat2luapat(v) + end + return in_patternmap(m, name) +end + + +function fail(msg) + stats.assertions = stats.assertions + 1 + failure( "fail", msg, "failure" ) +end +traceback_hide( fail ) + + +function assert(assertion, msg) + stats.assertions = stats.assertions + 1 + if not assertion then + failure( "assert", msg, "assertion failed" ) + end + return assertion +end +traceback_hide( assert ) + + +function assert_true(actual, msg) + stats.assertions = stats.assertions + 1 + if actual ~= true then + failure( "assert_true", msg, "true expected but was %s", format_arg(actual) ) + end + return actual +end +traceback_hide( assert_true ) + + +function assert_false(actual, msg) + stats.assertions = stats.assertions + 1 + if actual ~= false then + failure( "assert_false", msg, "false expected but was %s", format_arg(actual) ) + end + return actual +end +traceback_hide( assert_false ) + + +function assert_equal(expected, actual, msg) + stats.assertions = stats.assertions + 1 + if expected ~= actual then + failure( "assert_equal", msg, "expected %s but was %s", format_arg(expected), format_arg(actual) ) + end + return actual +end +traceback_hide( assert_equal ) + + +function assert_not_equal(unexpected, actual, msg) + stats.assertions = stats.assertions + 1 + if unexpected == actual then + failure( "assert_not_equal", msg, "%s not expected but was one", format_arg(unexpected) ) + end + return actual +end +traceback_hide( assert_not_equal ) + + +function assert_match(pattern, actual, msg) + stats.assertions = stats.assertions + 1 + if type(pattern) ~= "string" then + failure( "assert_match", msg, "expected a string as pattern but was %s", format_arg(pattern) ) + end + if type(actual) ~= "string" then + failure( "assert_match", msg, "expected a string to match pattern '%s' but was a %s", pattern, format_arg(actual) ) + end + if not string_find(actual, pattern) then + failure( "assert_match", msg, "expected '%s' to match pattern '%s' but doesn't", actual, pattern ) + end + return actual +end +traceback_hide( assert_match ) + + +function assert_not_match(pattern, actual, msg) + stats.assertions = stats.assertions + 1 + if type(pattern) ~= "string" then + failure( "assert_not_match", msg, "expected a string as pattern but was %s", format_arg(pattern) ) + end + if type(actual) ~= "string" then + failure( "assert_not_match", msg, "expected a string to not match pattern '%s' but was %s", pattern, format_arg(actual) ) + end + if string_find(actual, pattern) then + failure( "assert_not_match", msg, "expected '%s' to not match pattern '%s' but it does", actual, pattern ) + end + return actual +end +traceback_hide( assert_not_match ) + + +function assert_error(msg, func) + stats.assertions = stats.assertions + 1 + if func == nil then + func, msg = msg, nil + end + if type(func) ~= "function" then + failure( "assert_error", msg, "expected a function as last argument but was %s", format_arg(func) ) + end + local ok, errmsg = pcall(func) + if ok then + failure( "assert_error", msg, "error expected but no error occurred" ) + end +end +traceback_hide( assert_error ) + + +function assert_error_match(msg, pattern, func) + stats.assertions = stats.assertions + 1 + if func == nil then + msg, pattern, func = nil, msg, pattern + end + if type(pattern) ~= "string" then + failure( "assert_error_match", msg, "expected the pattern as a string but was %s", format_arg(pattern) ) + end + if type(func) ~= "function" then + failure( "assert_error_match", msg, "expected a function as last argument but was %s", format_arg(func) ) + end + local ok, errmsg = pcall(func) + if ok then + failure( "assert_error_match", msg, "error expected but no error occurred" ) + end + if type(errmsg) ~= "string" then + failure( "assert_error_match", msg, "error as string expected but was %s", format_arg(errmsg) ) + end + if not string_find(errmsg, pattern) then + failure( "assert_error_match", msg, "expected error '%s' to match pattern '%s' but doesn't", errmsg, pattern ) + end +end +traceback_hide( assert_error_match ) + + +function assert_pass(msg, func) + stats.assertions = stats.assertions + 1 + if func == nil then + func, msg = msg, nil + end + if type(func) ~= "function" then + failure( "assert_pass", msg, "expected a function as last argument but was %s", format_arg(func) ) + end + local ok, errmsg = pcall(func) + if not ok then + failure( "assert_pass", msg, "no error expected but error was: '%s'", errmsg ) + end +end +traceback_hide( assert_pass ) + + +-- lunit.assert_typename functions + +for _, typename in ipairs(typenames) do + local assert_typename = "assert_"..typename + lunit[assert_typename] = function(actual, msg) + stats.assertions = stats.assertions + 1 + if type(actual) ~= typename then + failure( assert_typename, msg, "%s expected but was %s", typename, format_arg(actual) ) + end + return actual + end + traceback_hide( lunit[assert_typename] ) +end + + +-- lunit.assert_not_typename functions + +for _, typename in ipairs(typenames) do + local assert_not_typename = "assert_not_"..typename + lunit[assert_not_typename] = function(actual, msg) + stats.assertions = stats.assertions + 1 + if type(actual) == typename then + failure( assert_not_typename, msg, typename.." not expected but was one" ) + end + end + traceback_hide( lunit[assert_not_typename] ) +end + + +function lunit.clearstats() + stats = { + assertions = 0; + passed = 0; + failed = 0; + errors = 0; + } +end + + +local report, reporterrobj +do + local testrunner + + function lunit.setrunner(newrunner) + if not ( is_table(newrunner) or is_nil(newrunner) ) then + return error("lunit.setrunner: Invalid argument", 0) + end + local oldrunner = testrunner + testrunner = newrunner + return oldrunner + end + + function lunit.loadrunner(name) + if not is_string(name) then + return error("lunit.loadrunner: Invalid argument", 0) + end + local ok, runner = pcall( require, name ) + if not ok then + return error("lunit.loadrunner: Can't load test runner: "..runner, 0) + end + return setrunner(runner) + end + + function lunit.getrunner() + return testrunner + end + + function report(event, ...) + local f = testrunner and testrunner[event] + if is_function(f) then + pcall(f, ...) + end + end + + function reporterrobj(context, tcname, testname, errobj) + local fullname = tcname .. "." .. testname + if context == "setup" then + fullname = fullname .. ":" .. setupname(tcname, testname) + elseif context == "teardown" then + fullname = fullname .. ":" .. teardownname(tcname, testname) + end + if errobj.type == __failure__ then + stats.failed = stats.failed + 1 + report("fail", fullname, errobj.where, errobj.msg, errobj.usermsg) + else + stats.errors = stats.errors + 1 + report("err", fullname, errobj.msg, errobj.tb) + end + end +end + + + +local function key_iter(t, k) + return (next(t,k)) +end + + +local testcase +do + -- Array with all registered testcases + local _testcases = {} + + -- Marks a module as a testcase. + -- Applied over a module from module("xyz", lunit.testcase). + function lunit.testcase(m) + orig_assert( is_table(m) ) + --orig_assert( m._M == m ) + orig_assert( is_string(m._NAME) ) + --orig_assert( is_string(m._PACKAGE) ) + + -- Register the module as a testcase + _testcases[m._NAME] = m + + -- Import lunit, fail, assert* and is_* function to the module/testcase + m.lunit = lunit + m.fail = lunit.fail + for funcname, func in pairs(lunit) do + if "assert" == string_sub(funcname, 1, 6) or "is_" == string_sub(funcname, 1, 3) then + m[funcname] = func + end + end + end + + function lunit.module(name,seeall) + local m = {} + if seeall == "seeall" then + setmetatable(m, { __index = _G }) + end + m._NAME = name + lunit.testcase(m) + return m + end + + -- Iterator (testcasename) over all Testcases + function lunit.testcases() + -- Make a copy of testcases to prevent confusing the iterator when + -- new testcase are defined + local _testcases2 = {} + for k,v in pairs(_testcases) do + _testcases2[k] = true + end + return key_iter, _testcases2, nil + end + + function testcase(tcname) + return _testcases[tcname] + end +end + + +do + -- Finds a function in a testcase case insensitive + local function findfuncname(tcname, name) + for key, value in pairs(testcase(tcname)) do + if is_string(key) and is_function(value) and string_lower(key) == name then + return key + end + end + end + + function lunit.setupname(tcname) + return findfuncname(tcname, "setup") + end + + function lunit.teardownname(tcname) + return findfuncname(tcname, "teardown") + end + + -- Iterator over all test names in a testcase. + -- Have to collect the names first in case one of the test + -- functions creates a new global and throws off the iteration. + function lunit.tests(tcname) + local testnames = {} + for key, value in pairs(testcase(tcname)) do + if is_string(key) and is_function(value) then + local lfn = string_lower(key) + if string_sub(lfn, 1, 4) == "test" or string_sub(lfn, -4) == "test" then + testnames[key] = true + end + end + end + return key_iter, testnames, nil + end +end + + + + +function lunit.runtest(tcname, testname) + orig_assert( is_string(tcname) ) + orig_assert( is_string(testname) ) + + if (not getrunner()) then + loadrunner("lunit.console") + end + + local function callit(context, func) + if func then + local err = mypcall(func) + if err then + reporterrobj(context, tcname, testname, err) + return false + end + end + return true + end + traceback_hide(callit) + + report("run", tcname, testname) + + local tc = testcase(tcname) + local setup = tc[setupname(tcname)] + local test = tc[testname] + local teardown = tc[teardownname(tcname)] + + local setup_ok = callit( "setup", setup ) + local test_ok = setup_ok and callit( "test", test ) + local teardown_ok = setup_ok and callit( "teardown", teardown ) + + if setup_ok and test_ok and teardown_ok then + stats.passed = stats.passed + 1 + report("pass", tcname, testname) + end +end +traceback_hide(runtest) + + + +function lunit.run(testpatterns) + clearstats() + report("begin") + for testcasename in lunit.testcases() do + -- Run tests in the testcases + for testname in lunit.tests(testcasename) do + if selected(testpatterns, testname) then + runtest(testcasename, testname) + end + end + end + report("done") + return stats +end +traceback_hide(run) + + +function lunit.loadonly() + clearstats() + report("begin") + report("done") + return stats +end + + + + + + + + + +local lunitpat2luapat +do + local conv = { + ["^"] = "%^", + ["$"] = "%$", + ["("] = "%(", + [")"] = "%)", + ["%"] = "%%", + ["."] = "%.", + ["["] = "%[", + ["]"] = "%]", + ["+"] = "%+", + ["-"] = "%-", + ["?"] = ".", + ["*"] = ".*" + } + function lunitpat2luapat(str) + --return "^" .. string.gsub(str, "%W", conv) .. "$" + -- Above was very annoying, if I want to run all the tests having to do with + -- RSS, I want to be able to do "-t rss" not "-t \*rss\*". + return string_gsub(str, "%W", conv) + end +end + + + +local function in_patternmap(map, name) + if map[name] == true then + return true + else + for _, pat in ipairs(map) do + if string_find(name, pat) then + return true + end + end + end + return false +end + + + + + + + + +-- Called from 'lunit' shell script. + +function main(argv) + argv = argv or {} + + -- FIXME: Error handling and error messages aren't nice. + + local function checkarg(optname, arg) + if not is_string(arg) then + return error("lunit.main: option "..optname..": argument missing.", 0) + end + end + + local function loadtestcase(filename) + if not is_string(filename) then + return error("lunit.main: invalid argument") + end + local chunk, err = loadfile(filename) + if err then + return error(err) + else + chunk() + end + end + + local testpatterns = nil + local doloadonly = false + + local i = 0 + while i < #argv do + i = i + 1 + local arg = argv[i] + if arg == "--loadonly" then + doloadonly = true + elseif arg == "--runner" or arg == "-r" then + local optname = arg; i = i + 1; arg = argv[i] + checkarg(optname, arg) + loadrunner(arg) + elseif arg == "--test" or arg == "-t" then + local optname = arg; i = i + 1; arg = argv[i] + checkarg(optname, arg) + testpatterns = testpatterns or {} + testpatterns[#testpatterns+1] = arg + elseif arg == "--help" or arg == "-h" then + print[[ +lunit 0.5 +Copyright (c) 2004-2009 Michael Roth +This program comes WITHOUT WARRANTY OF ANY KIND. + +Usage: lua test [OPTIONS] [--] scripts + +Options: + + -r, --runner RUNNER Testrunner to use, defaults to 'lunit-console'. + -t, --test PATTERN Which tests to run, may contain * or ? wildcards. + --loadonly Only load the tests. + -h, --help Print this help screen. + +Please report bugs to . +]] + return + elseif arg == "--" then + while i < #argv do + i = i + 1; arg = argv[i] + loadtestcase(arg) + end + else + loadtestcase(arg) + end + end + + if doloadonly then + return loadonly() + else + return run(testpatterns) + end +end + +clearstats() + +return lunit diff --git a/bindings/lua/lunitx/lunit/console.lua b/bindings/lua/lunitx/lunit/console.lua new file mode 100644 index 0000000..0ff22a4 --- /dev/null +++ b/bindings/lua/lunitx/lunit/console.lua @@ -0,0 +1,156 @@ + +--[[-------------------------------------------------------------------------- + + This file is part of lunit 0.5. + + For Details about lunit look at: http://www.mroth.net/lunit/ + + Author: Michael Roth + + Copyright (c) 2006-2008 Michael Roth + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +--]]-------------------------------------------------------------------------- + + + +--[[ + + begin() + run(testcasename, testname) + err(fullname, message, traceback) + fail(fullname, where, message, usermessage) + pass(testcasename, testname) + done() + + Fullname: + testcase.testname + testcase.testname:setupname + testcase.testname:teardownname + +--]] + + +lunit = require "lunit" + +local lunit_console + +if _VERSION >= 'Lua 5.2' then + + lunit_console = setmetatable({},{__index = _ENV}) + _ENV = lunit_console + +else + + module( "lunit-console", package.seeall ) + lunit_console = _M + +end + + + +local function printformat(format, ...) + io.write( string.format(format, ...) ) +end + + +local columns_printed = 0 + +local function writestatus(char) + if columns_printed == 0 then + io.write(" ") + end + if columns_printed == 60 then + io.write("\n ") + columns_printed = 0 + end + io.write(char) + io.flush() + columns_printed = columns_printed + 1 +end + + +local msgs = {} + + +function begin() + local total_tc = 0 + local total_tests = 0 + + msgs = {} -- e + + for tcname in lunit.testcases() do + total_tc = total_tc + 1 + for testname, test in lunit.tests(tcname) do + total_tests = total_tests + 1 + end + end + + printformat("Loaded testsuite with %d tests in %d testcases.\n\n", total_tests, total_tc) +end + + +function run(testcasename, testname) + -- NOP +end + + +function err(fullname, message, traceback) + writestatus("E") + msgs[#msgs+1] = "Error! ("..fullname.."):\n"..message.."\n\t"..table.concat(traceback, "\n\t") .. "\n" +end + + +function fail(fullname, where, message, usermessage) + writestatus("F") + local text = "Failure ("..fullname.."):\n".. + where..": "..message.."\n" + + if usermessage then + text = text .. where..": "..usermessage.."\n" + end + + msgs[#msgs+1] = text +end + + +function pass(testcasename, testname) + writestatus(".") +end + + + +function done() + printformat("\n\n%d Assertions checked.\n", lunit.stats.assertions ) + print() + + for i, msg in ipairs(msgs) do + printformat( "%3d) %s\n", i, msg ) + end + + printformat("Testsuite finished (%d passed, %d failed, %d errors).\n", + lunit.stats.passed, lunit.stats.failed, lunit.stats.errors ) +end + + +return lunit_console + + diff --git a/bindings/lua/lunitx/lunitx.lua b/bindings/lua/lunitx/lunitx.lua new file mode 100644 index 0000000..7656e6a --- /dev/null +++ b/bindings/lua/lunitx/lunitx.lua @@ -0,0 +1,21 @@ +local atexit = require "atexit" +local lunit = require "lunit" + +--for k,v in pairs(debug.getinfo(1,"S")) do print(k,v) end +-- autonameing +-- module("bcrc-test", lunit.testcase, package.seeall) + +atexit(function() + local _, emsg = xpcall(function() + lunit.main(arg) + end, debug.traceback) + if emsg then + print(emsg) + os.exit(1) + end + if lunit.stats.failed > 0 or lunit.stats.errors > 0 then + os.exit(1) + end +end) + +return lunit diff --git a/bindings/lua/table.c b/bindings/lua/table.c new file mode 100644 index 0000000..31b92d2 --- /dev/null +++ b/bindings/lua/table.c @@ -0,0 +1,167 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Lua extension that provides access to upb_table. This is an internal-only + * interface and exists for the sole purpose of writing a C code generator in + * Lua that can dump a upb_table as static C initializers. This lets us use + * Lua for convenient string manipulation while saving us from re-implementing + * the upb_table hash function and hash table layout / collision strategy in + * Lua. + * + * Since this is used only as part of the toolchain (and not part of the + * runtime) we do not hold this module to the same stringent requirements as + * the main Lua modules (for example that misbehaving Lua programs cannot + * crash the interpreter). + */ + +#include +#include +#include +#include +#include "lauxlib.h" +#include "bindings/lua/upb.h" +#include "upb/def.h" + +static void lupbtable_setnum(lua_State *L, int tab, const char *key, + lua_Number val) { + lua_pushnumber(L, val); + lua_setfield(L, tab - 1, key); +} + +static void lupbtable_pushval(lua_State *L, upb_value val, upb_ctype_t type) { + switch (type) { + case UPB_CTYPE_INT32: + lua_pushnumber(L, upb_value_getint32(val)); + break; + case UPB_CTYPE_PTR: + lupb_def_pushwrapper(L, upb_value_getptr(val), NULL); + break; + case UPB_CTYPE_CSTR: + lua_pushstring(L, upb_value_getcstr(val)); + break; + default: + luaL_error(L, "Unexpected type: %d", type); + } +} + +// Sets a few fields common to both hash table entries and arrays. +static void lupbtable_setmetafields(lua_State *L, int type, const void *ptr) { + // We tack this onto every entry so we know it even if the entries + // don't stay with the table. + lua_pushnumber(L, type); + lua_setfield(L, -2, "valtype"); + + // Set this to facilitate linking. + lua_pushlightuserdata(L, (void*)ptr); + lua_setfield(L, -2, "ptr"); +} + +static void lupbtable_pushent(lua_State *L, const upb_tabent *e, + bool inttab, int type) { + lua_newtable(L); + if (!upb_tabent_isempty(e)) { + if (inttab) { + lua_pushnumber(L, e->key.num); + } else { + lua_pushstring(L, e->key.str); + } + lua_setfield(L, -2, "key"); + lupbtable_pushval(L, e->val, type); + lua_setfield(L, -2, "value"); + } + lua_pushlightuserdata(L, (void*)e->next); + lua_setfield(L, -2, "next"); + lupbtable_setmetafields(L, type, e); +} + +// Dumps the shared part of upb_table into a Lua table. +static void lupbtable_pushtable(lua_State *L, const upb_table *t, bool inttab) { + lua_newtable(L); + lupbtable_setnum(L, -1, "count", t->count); + lupbtable_setnum(L, -1, "mask", t->mask); + lupbtable_setnum(L, -1, "type", t->type); + lupbtable_setnum(L, -1, "size_lg2", t->size_lg2); + + lua_newtable(L); + for (int i = 0; i < upb_table_size(t); i++) { + lupbtable_pushent(L, &t->entries[i], inttab, t->type); + lua_rawseti(L, -2, i + 1); + } + lua_setfield(L, -2, "entries"); +} + +// Dumps a upb_inttable to a Lua table. +static void lupbtable_pushinttable(lua_State *L, const upb_inttable *t) { + lupbtable_pushtable(L, &t->t, true); + lupbtable_setnum(L, -1, "array_size", t->array_size); + lupbtable_setnum(L, -1, "array_count", t->array_count); + + lua_newtable(L); + for (int i = 0; i < t->array_size; i++) { + lua_newtable(L); + if (upb_arrhas(t->array[i])) { + lupbtable_pushval(L, t->array[i], t->t.type); + lua_setfield(L, -2, "val"); + } + lupbtable_setmetafields(L, t->t.type, &t->array[i]); + lua_rawseti(L, -2, i + 1); + } + lua_setfield(L, -2, "array"); +} + +static void lupbtable_pushstrtable(lua_State *L, const upb_strtable *t) { + lupbtable_pushtable(L, &t->t, false); +} + +static int lupbtable_msgdef_itof(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lupbtable_pushinttable(L, &m->itof); + return 1; +} + +static int lupbtable_msgdef_ntof(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lupbtable_pushstrtable(L, &m->ntof); + return 1; +} + +static int lupbtable_enumdef_iton(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + lupbtable_pushinttable(L, &e->iton); + return 1; +} + +static int lupbtable_enumdef_ntoi(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + lupbtable_pushstrtable(L, &e->ntoi); + return 1; +} + +static void lupbtable_setfieldi(lua_State *L, const char *field, int i) { + lua_pushnumber(L, i); + lua_setfield(L, -2, field); +} + +static const struct luaL_Reg lupbtable_toplevel_m[] = { + {"msgdef_itof", lupbtable_msgdef_itof}, + {"msgdef_ntof", lupbtable_msgdef_ntof}, + {"enumdef_iton", lupbtable_enumdef_iton}, + {"enumdef_ntoi", lupbtable_enumdef_ntoi}, + {NULL, NULL} +}; + +int luaopen_upbtable(lua_State *L) { + lupb_newlib(L, "upb.table", lupbtable_toplevel_m); + + // We define these here because they are not public (at least at the moment). + lupbtable_setfieldi(L, "CTYPE_PTR", UPB_CTYPE_PTR); + lupbtable_setfieldi(L, "CTYPE_INT32", UPB_CTYPE_INT32); + + lua_pushlightuserdata(L, NULL); + lua_setfield(L, -2, "NULL"); + + return 1; // Return a single Lua value, the package table created above. +} diff --git a/bindings/lua/test.lua b/bindings/lua/test.lua index 42bce25..6b162a9 100644 --- a/bindings/lua/test.lua +++ b/bindings/lua/test.lua @@ -1,109 +1,262 @@ -require "upb" +local upb = require "upb" +local lunit = require "lunitx" -symtab = upb.SymbolTable{ - upb.MessageDef{fqname="A", fields={ - upb.FieldDef{name="a", type=upb.TYPE_INT32, number=1}, - upb.FieldDef{name="b", type=upb.TYPE_DOUBLE, number=2}} +if _VERSION >= 'Lua 5.2' then + _ENV = lunit.module("testupb", "seeall") +else + module("testupb", lunit.testcase, package.seeall) +end + +function test_fielddef() + local f = upb.FieldDef() + assert_false(f:is_frozen()) + assert_nil(f:number()) + assert_nil(f:name()) + assert_equal(upb.LABEL_OPTIONAL, f:label()) + + f:set_name("foo_field") + f:set_number(3) + f:set_label(upb.LABEL_REPEATED) + f:set_type(upb.TYPE_FLOAT) + + assert_equal("foo_field", f:name()) + assert_equal(3, f:number()) + assert_equal(upb.LABEL_REPEATED, f:label()) + assert_equal(upb.TYPE_FLOAT, f:type()) + + local f2 = upb.FieldDef{ + name = "foo", number = 5, type = upb.TYPE_DOUBLE, label = upb.LABEL_REQUIRED } -} - -symtab = upb.SymbolTable{ - upb.MessageDef{fqname="A", fields={ - upb.FieldDef{name="a", type=upb.TYPE_INT32, number=1}, - upb.FieldDef{name="b", type=upb.TYPE_DOUBLE, number=2}} - }, - upb.MessageDef{fqname="B"} -} -A, B, C = symtab:lookup("A", "B") -print(A) -print(B) -print(C) - -a = A() -a2 = upb.Message(A) -print("YO! a.a=" .. tostring(a.a) .. ", a2.a=" .. tostring(a2.a)) -a.a = 2 -a2.a = 3 -print("YO! a.a=" .. tostring(a.a) .. ", a2.a=" .. tostring(a2.a)) - -A = symtab:lookup("A") -if not A then - error("Could not find A") + + assert_equal("foo", f2:name()) + assert_equal(5, f2:number()) + assert_equal(upb.TYPE_DOUBLE, f2:type()) + assert_equal(upb.LABEL_REQUIRED, f2:label()) end -f = io.open("../../upb/descriptor.pb") -if not f then - error("Couldn't open descriptor.pb, try running 'make descriptorgen'") +function test_enumdef() + local e = upb.EnumDef() + assert_equal(0, #e) + assert_nil(e:value(5)) + assert_nil(e:value("NONEXISTENT_NAME")) + + for name, value in e:values() do + fail() + end + + e:add("VAL1", 1) + e:add("VAL2", 2) + + local values = {} + for name, value in e:values() do + values[name] = value + end + + assert_equal(1, values["VAL1"]) + assert_equal(2, values["VAL2"]) + + local e2 = upb.EnumDef{ + values = { + {"FOO", 1}, + {"BAR", 77}, + } + } + + assert_equal(1, e2:value("FOO")) + assert_equal(77, e2:value("BAR")) + assert_equal("FOO", e2:value(1)) + assert_equal("BAR", e2:value(77)) end -symtab:parsedesc(f:read("*all")) -symtab:load_descriptor() -symtab:load_descriptor_file() -upb.pb.load_descriptor(f:read("*all")) +function test_empty_msgdef() + local md = upb.MessageDef() + assert_nil(md:full_name()) -- Def without name is anonymous. + assert_false(md:is_frozen()) + assert_equal(0, #md) + assert_nil(md:field("nonexistent_field")) + assert_nil(md:field(3)) + for field in md:fields() do + fail() + end + + upb.freeze(md) + assert_true(md:is_frozen()) + assert_equal(0, #md) + assert_nil(md:field("nonexistent_field")) + assert_nil(md:field(3)) + for field in md:fields() do + fail() + end +end + +function test_msgdef_constructor() + local f1 = upb.FieldDef{name = "field1", number = 7, type = upb.TYPE_INT32} + local f2 = upb.FieldDef{name = "field2", number = 8, type = upb.TYPE_INT32} + local md = upb.MessageDef{ + full_name = "TestMessage", + fields = {f1, f2} + } + assert_equal("TestMessage", md:full_name()) + assert_false(md:is_frozen()) + assert_equal(2, #md) + assert_equal(f1, md:field("field1")) + assert_equal(f2, md:field("field2")) + assert_equal(f1, md:field(7)) + assert_equal(f2, md:field(8)) + local count = 0 + local found = {} + for field in md:fields() do + count = count + 1 + found[field] = true + end + assert_equal(2, count) + assert_true(found[f1]) + assert_true(found[f2]) -upb.pb.load_descriptor_file("../../src/descriptor.pb", symtab) + upb.freeze(md) +end + +function test_msgdef_setters() + local md = upb.MessageDef() + md:set_full_name("Message1") + assert_equal("Message1", md:full_name()) + local f = upb.FieldDef{name = "field1", number = 3, type = upb.TYPE_DOUBLE} + md:add{f} + assert_equal(1, #md) + assert_equal(f, md:field("field1")) +end + +function test_msgdef_errors() + assert_error(function() upb.MessageDef{bad_initializer_key = 5} end) + local md = upb.MessageDef() + assert_error(function() + -- Duplicate field number. + upb.MessageDef{ + fields = { + upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32}, + upb.FieldDef{name = "field2", number = 1, type = upb.TYPE_INT32} + } + } + end) + assert_error(function() + -- Duplicate field name. + upb.MessageDef{ + fields = { + upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32}, + upb.FieldDef{name = "field1", number = 2, type = upb.TYPE_INT32} + } + } + end) + + -- attempt to set a name with embedded NULLs. + assert_error_match("names cannot have embedded NULLs", function() + md:set_full_name("abc\0def") + end) + + upb.freeze(md) + -- Attempt to mutate frozen MessageDef. + -- TODO(haberman): better error message and test for message. + assert_error(function() + md:add{upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32}} + end) + assert_error(function() + md:set_full_name("abc") + end) + + -- Attempt to freeze a msgdef without freezing its subdef. + assert_error_match("is not frozen or being frozen", function() + m1 = upb.MessageDef() + upb.freeze( + upb.MessageDef{ + fields = { + upb.FieldDef{name = "f1", number = 1, type = upb.TYPE_MESSAGE, + subdef = m1} + } + } + ) + end) +end + +function test_symtab() + local empty = upb.SymbolTable() + assert_equal(0, #empty:getdefs(upb.DEF_ANY)) + + local symtab = upb.SymbolTable{ + upb.MessageDef{full_name = "TestMessage"}, + upb.MessageDef{full_name = "ContainingMessage", fields = { + upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32}, + upb.FieldDef{name = "field2", number = 2, type = upb.TYPE_MESSAGE, + subdef_name = ".TestMessage"} + } + } + } + + local msgdef1 = symtab:lookup("TestMessage") + local msgdef2 = symtab:lookup("ContainingMessage") + assert_not_nil(msgdef1) + assert_not_nil(msgdef2) + assert_equal(msgdef1, msgdef2:field("field2"):subdef()) + assert_true(msgdef1:is_frozen()) + assert_true(msgdef2:is_frozen()) + + symtab:add{ + upb.MessageDef{full_name = "ContainingMessage2", fields = { + upb.FieldDef{name = "field5", number = 5, type = upb.TYPE_MESSAGE, + subdef = msgdef2} + } + } + } + + local msgdef3 = symtab:lookup("ContainingMessage2") + assert_not_nil(msgdef3) + assert_equal(msgdef3:field("field5"):subdef(), msgdef2) +end -f = io.open("../../benchmarks/google_messages.proto.pb") -if not f then - error("Couldn't open google_messages.proto.pb, try running 'make benchmarks'") +-- Lua 5.1 and 5.2 have slightly different semantics for how a finalizer +-- can be defined in Lua. +if _VERSION >= 'Lua 5.2' then + function defer(fn) + setmetatable({}, { __gc = fn }) + end +else + function defer(fn) + getmetatable(newproxy(true)).__gc = fn + end end -symtab:parsedesc(f:read("*all")) -for _, def in ipairs(symtab:getdefs(-1)) do - print(def:name()) +function test_finalizer() + -- Tests that we correctly handle a call into an already-finalized object. + -- Collectible objects are finalized in the opposite order of creation. + do + local t = {} + defer(function() + assert_error_match("called into dead def", function() + -- Generic def call. + t[1]:full_name() + end) + assert_error_match("called into dead msgdef", function() + -- Specific msgdef call. + t[1]:add() + end) + assert_error_match("called into dead enumdef", function() + t[2]:values() + end) + assert_error_match("called into dead fielddef", function() + t[3]:number() + end) + assert_error_match("called into dead symtab", + function() t[4]:lookup() + end) + end) + t = { + upb.MessageDef(), + upb.EnumDef(), + upb.FieldDef(), + upb.SymbolTable(), + } + end + collectgarbage() end -SpeedMessage1 = symtab:lookup("benchmarks.SpeedMessage1") -SpeedMessage2 = symtab:lookup("benchmarks.SpeedMessage2") -print(SpeedMessage1:name()) - -msg = MyType() -msg:Decode(str) - -msg:DecodeJSON(str) - -msg = upb.pb.decode(str, MyType) -str = upb.pb.encode(msg) - -msg = upb.pb.decode_text(str, MyType) -str = upb.pb.encode_text(msg) - -upb.clear(msg) -upb.msgdef(msg) -upb.has(msg, "foo_bar") - -msg = upb.json.decode(str, MyType) - -msg = upb.pb.DecodeText(str) -msg = upb.pb.EncodeText(msg) -upb. - -upb.pb.decode_into(msg, str) - -str = upb.json.Encode(msg) -upb.json.DecodeInto(msg, str) -f = assert(io.open("../../benchmarks/google_message1.dat")) -msg:Parse(f:read("*all")) -print(msg:ToText()) -print(upb.json.encode(msg)) - -msg = SpeedMessage2() -f = assert(io.open("../../benchmarks/google_message2.dat")) -msg:Parse(f:read("*all")) -print(msg:ToText()) ---msg:Serialize() ---msg:FromText(str) --- print(msg.field129) --- print(msg.field271) ---print(msg.field15.field15) ---msg.field15.field15 = "my override" ---print(msg.field15.field15) --- print(msg.field1) --- print(msg.field1) --- msg.field1 = "YEAH BABY!" --- print(msg.field1) --- print(msg.field129) --- msg.field129 = 5 --- print(msg.field129) ---]] +lunit.main() diff --git a/bindings/lua/upb.c b/bindings/lua/upb.c index 4cce4b6..1a1d7c0 100644 --- a/bindings/lua/upb.c +++ b/bindings/lua/upb.c @@ -4,96 +4,85 @@ * Copyright (c) 2009 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * A Lua extension for upb. + * A Lua extension for upb. Exposes only the core library + * (sub-libraries are exposed in other extensions). */ -#include -#include #include +#include +#include +#include #include "lauxlib.h" -#include "upb/def.h" -#include "upb/msg.h" +#include "bindings/lua/upb.h" +#include "upb/bytestream.h" #include "upb/pb/glue.h" -#if LUA_VERSION_NUM == 501 -#define lua_rawlen lua_objlen -#endif +// Lua metatable types. +#define LUPB_MSGDEF "lupb.msgdef" +#define LUPB_ENUMDEF "lupb.enumdef" +#define LUPB_FIELDDEF "lupb.fielddef" +#define LUPB_SYMTAB "lupb.symtab" -static bool streql(const char *a, const char *b) { return strcmp(a, b) == 0; } +// Other table constants. +#define LUPB_OBJCACHE "lupb.objcache" -static bool lupb_isint(double n) { return (double)(int)n == n; } +#if LUA_VERSION_NUM == 501 -static uint8_t lupb_touint8(lua_State *L, int narg, const char *name) { - lua_Number n = lua_tonumber(L, narg); - if (n > UINT8_MAX || n < 0 || rint(n) != n) - luaL_error(L, "Invalid %s", name); - return n; +// Taken from Lua 5.2's source. +void *luaL_testudata(lua_State *L, int ud, const char *tname) { + void *p = lua_touserdata(L, ud); + if (p != NULL) { /* value is a userdata? */ + if (lua_getmetatable(L, ud)) { /* does it have a metatable? */ + luaL_getmetatable(L, tname); /* get correct metatable */ + if (!lua_rawequal(L, -1, -2)) /* not the same? */ + p = NULL; /* value is a userdata with wrong metatable */ + lua_pop(L, 2); /* remove both metatables */ + return p; + } + } + return NULL; /* value is not a userdata with a metatable */ } -static uint32_t lupb_touint32(lua_State *L, int narg, const char *name) { - lua_Number n = lua_tonumber(L, narg); - if (n > UINT32_MAX || n < 0 || rint(n) != n) - luaL_error(L, "Invalid %s", name); - return n; +#elif LUA_VERSION_NUM == 502 + +int luaL_typerror(lua_State *L, int narg, const char *tname) { + const char *msg = lua_pushfstring(L, "%s expected, got %s", + tname, luaL_typename(L, narg)); + return luaL_argerror(L, narg, msg); } -static void lupb_pushstring(lua_State *L, const upb_strref *ref) { - if (ref->ptr) { - lua_pushlstring(L, ref->ptr, ref->len); - } else { - // Lua requires a continguous string; must copy+allocate. - char *str = upb_strref_dup(ref); - lua_pushlstring(L, str, ref->len); - free(str); - } +#else +#error Only Lua 5.1 and 5.2 are supported +#endif + +const char *lupb_checkname(lua_State *L, int narg) { + size_t len; + const char *name = luaL_checklstring(L, narg, &len); + if (strlen(name) != len) + luaL_error(L, "names cannot have embedded NULLs"); + return name; } -static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) { - switch (f->type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(ENUM): - lua_pushnumber(L, upb_value_getint32(val)); break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): - lua_pushnumber(L, upb_value_getint64(val)); break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - lua_pushnumber(L, upb_value_getuint32(val)); break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - lua_pushnumber(L, upb_value_getuint64(val)); break; - case UPB_TYPE(DOUBLE): - lua_pushnumber(L, upb_value_getdouble(val)); break; - case UPB_TYPE(FLOAT): - lua_pushnumber(L, upb_value_getfloat(val)); break; - case UPB_TYPE(BOOL): - lua_pushboolean(L, upb_value_getbool(val)); break; - default: luaL_error(L, "internal error"); - } +static bool streql(const char *a, const char *b) { return strcmp(a, b) == 0; } + +static uint32_t lupb_checkint32(lua_State *L, int narg, const char *name) { + lua_Number n = lua_tonumber(L, narg); + if (n > INT32_MAX || n < INT32_MIN || rint(n) != n) + luaL_error(L, "Invalid %s", name); + return n; } -// Returns a scalar value (ie. not a submessage) as a upb_value. -static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f, - upb_strref *ref) { - assert(!upb_issubmsg(f)); +// Converts a number or bool from Lua -> upb_value. +static upb_value lupb_getvalue(lua_State *L, int narg, upb_fieldtype_t type) { upb_value val; - if (upb_fielddef_type(f) == UPB_TYPE(BOOL)) { + if (type == UPB_TYPE(BOOL)) { if (!lua_isboolean(L, narg)) luaL_error(L, "Must explicitly pass true or false for boolean fields"); upb_value_setbool(&val, lua_toboolean(L, narg)); - } else if (upb_fielddef_type(f) == UPB_TYPE(STRING)) { - size_t len; - ref->ptr = luaL_checklstring(L, narg, &len); - ref->len = len; - upb_value_setstrref(&val, ref); } else { // Numeric type. - lua_Number num = 0; - num = luaL_checknumber(L, narg); - switch (upb_fielddef_type(f)) { + lua_Number num = luaL_checknumber(L, narg); + switch (type) { case UPB_TYPE(INT32): case UPB_TYPE(SINT32): case UPB_TYPE(SFIXED32): @@ -133,374 +122,743 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f, luaL_error(L, "Cannot convert %f to float", num); upb_value_setfloat(&val, num); break; + default: luaL_error(L, "invalid type"); } } return val; } -static void lupb_typecheck(lua_State *L, int narg, upb_fielddef *f) { - upb_strref ref; - lupb_getvalue(L, narg, f, &ref); +// Converts a upb_value -> Lua value. +static void lupb_pushvalue(lua_State *L, upb_value val, upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(ENUM): + lua_pushnumber(L, upb_value_getint32(val)); break; + case UPB_TYPE(INT64): + case UPB_TYPE(SINT64): + case UPB_TYPE(SFIXED64): + lua_pushnumber(L, upb_value_getint64(val)); break; + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + lua_pushnumber(L, upb_value_getuint32(val)); break; + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + lua_pushnumber(L, upb_value_getuint64(val)); break; + case UPB_TYPE(DOUBLE): + lua_pushnumber(L, upb_value_getdouble(val)); break; + case UPB_TYPE(FLOAT): + lua_pushnumber(L, upb_value_getfloat(val)); break; + case UPB_TYPE(BOOL): + lua_pushboolean(L, upb_value_getbool(val)); break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): { + const upb_byteregion *r = upb_value_getbyteregion(val); + size_t len; + const char *str = upb_byteregion_getptr(r, 0, &len); + lua_pushlstring(L, str, len); + } + default: luaL_error(L, "internal error"); + } } -//static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md); -static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f); -static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg); -static void lupb_msg_pushnew(lua_State *L, const void *md); - void lupb_checkstatus(lua_State *L, upb_status *s) { if (!upb_ok(s)) { - // Need to copy the string to the stack, so we can free it and not leak - // it (since luaL_error() does not return). - const char *str = upb_status_getstr(s); - char buf[strlen(str)+1]; - strcpy(buf, str); + lua_pushstring(L, upb_status_getstr(s)); + upb_status_uninit(s); + lua_error(L); + } else { upb_status_uninit(s); - luaL_error(L, "%s", buf); } - upb_status_uninit(s); } -/* object cache ***************************************************************/ +/* refcounted *****************************************************************/ + +// All upb objects that use upb_refcounted share a common Lua userdata +// representation and a common scheme for caching Lua wrapper object. They do +// however have different metatables. Objects are cached in a weak table +// indexed by the C pointer of the object they are caching. + +typedef union { + const upb_refcounted *refcounted; + const upb_def *def; + upb_symtab *symtab; +} lupb_refcounted; -// We cache all the lua objects (userdata) we vend in a weak table, indexed by -// the C pointer of the object they are caching. +static bool lupb_refcounted_pushwrapper(lua_State *L, const upb_refcounted *obj, + const char *type, const void *owner) { + if (obj == NULL) { + lua_pushnil(L); + return false; + } -static void *lupb_cache_getorcreate_size( - lua_State *L, void *cobj, const char *type, size_t size) { // Lookup our cache in the registry (we don't put our objects in the registry // directly because we need our cache to be a weak table). - void **obj = NULL; - lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); + lupb_refcounted *ud = NULL; + lua_getfield(L, LUA_REGISTRYINDEX, LUPB_OBJCACHE); assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. - lua_pushlightuserdata(L, cobj); + lua_pushlightuserdata(L, (void*)obj); lua_rawget(L, -2); // Stack: objcache, cached value. - if (lua_isnil(L, -1)) { + bool create = lua_isnil(L, -1) || + // A corner case: it is possible for the value to be GC'd + // already, in which case we should evict this entry and create + // a new one. + ((lupb_refcounted*)lua_touserdata(L, -1))->refcounted == NULL; + if (create) { // Remove bad cached value and push new value. lua_pop(L, 1); + // We take advantage of the fact that all of our objects are currently a // single pointer, and thus have the same layout. - obj = lua_newuserdata(L, size); - *obj = cobj; + // TODO: this probably violates aliasing. + ud = lua_newuserdata(L, sizeof(lupb_refcounted)); + ud->refcounted = obj; + upb_refcounted_donateref(obj, owner, ud); + luaL_getmetatable(L, type); assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. lua_setmetatable(L, -2); // Set it in the cache. - lua_pushlightuserdata(L, cobj); + lua_pushlightuserdata(L, (void*)obj); lua_pushvalue(L, -2); lua_rawset(L, -4); + } else { + // Existing wrapper obj already has a ref. + ud = lua_touserdata(L, -1); + upb_refcounted_checkref(obj, ud); + if (owner) + upb_refcounted_unref(obj, owner); } lua_insert(L, -2); lua_pop(L, 1); - return obj; -} - -// Most types are just 1 pointer and can use this helper. -static bool lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type) { - return lupb_cache_getorcreate_size(L, cobj, type, sizeof(void*)) != NULL; + return create; } -static void lupb_cache_create(lua_State *L, void *cobj, const char *type) { - bool created = - lupb_cache_getorcreate_size(L, cobj, type, sizeof(void*)) != NULL; - (void)created; // For NDEBUG - assert(created); +static void lupb_refcounted_pushnewrapper(lua_State *L, upb_refcounted *obj, + const char *type, const void *owner) { + bool created = lupb_refcounted_pushwrapper(L, obj, type, owner); + UPB_ASSERT_VAR(created, created == true); } /* lupb_def *******************************************************************/ -// All the def types share the same C layout, even though they are different Lua -// types with different metatables. -typedef struct { - upb_def *def; -} lupb_def; - -static lupb_def *lupb_def_check(lua_State *L, int narg) { - void *ldef = luaL_checkudata(L, narg, "upb.msgdef"); - if (!ldef) ldef = luaL_checkudata(L, narg, "upb.enumdef"); - if (!ldef) luaL_typerror(L, narg, "upb def"); - return ldef; -} - -static void lupb_def_getorcreate(lua_State *L, const upb_def *def, int owned) { - bool created = false; - switch(def->type) { - case UPB_DEF_MSG: - created = lupb_cache_getorcreate(L, (void*)def, "upb.msgdef"); - break; - case UPB_DEF_ENUM: - created = lupb_cache_getorcreate(L, (void*)def, "upb.enumdef"); - break; - default: - luaL_error(L, "unknown deftype %d", def->type); +static const upb_def *lupb_def_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_testudata(L, narg, LUPB_MSGDEF); + if (!r) r = luaL_testudata(L, narg, LUPB_ENUMDEF); + if (!r) r = luaL_testudata(L, narg, LUPB_FIELDDEF); + if (!r) luaL_typerror(L, narg, "upb def"); + if (!r->refcounted) luaL_error(L, "called into dead def"); + return r->def; +} + +static upb_def *lupb_def_checkmutable(lua_State *L, int narg) { + const upb_def *def = lupb_def_check(L, narg); + if (upb_def_isfrozen(def)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_def*)def; +} + +bool lupb_def_pushwrapper(lua_State *L, const upb_def *def, const void *owner) { + if (def == NULL) { + lua_pushnil(L); + return false; } - if (!owned && created) { - upb_def_ref(def); - } else if (owned && !created) { - upb_def_unref(def); + + const char *type = NULL; + switch (def->type) { + case UPB_DEF_MSG: type = LUPB_MSGDEF; break; + case UPB_DEF_ENUM: type = LUPB_ENUMDEF; break; + case UPB_DEF_FIELD: type = LUPB_FIELDDEF; break; + default: luaL_error(L, "unknown deftype %d", def->type); } + return lupb_refcounted_pushwrapper(L, upb_upcast(def), type, owner); } +void lupb_def_pushnewrapper(lua_State *L, const upb_def *def, + const void *owner) { + bool created = lupb_def_pushwrapper(L, def, owner); + UPB_ASSERT_VAR(created, created == true); +} -/* lupb_fielddef **************************************************************/ +static int lupb_def_type(lua_State *L) { + const upb_def *def = lupb_def_check(L, 1); + lua_pushnumber(L, upb_def_type(def)); + return 1; +} -typedef struct { - upb_fielddef *field; -} lupb_fielddef; - -static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { - lupb_fielddef *f = luaL_checkudata(L, narg, "upb.fielddef"); - if (!f) luaL_typerror(L, narg, "upb fielddef"); - return f; -} - -static int lupb_fielddef_index(lua_State *L) { - lupb_fielddef *f = lupb_fielddef_check(L, 1); - const char *str = luaL_checkstring(L, 2); - if (streql(str, "name")) { - lua_pushstring(L, upb_fielddef_name(f->field)); - } else if (streql(str, "number")) { - lua_pushinteger(L, upb_fielddef_number(f->field)); - } else if (streql(str, "type")) { - lua_pushinteger(L, upb_fielddef_type(f->field)); - } else if (streql(str, "label")) { - lua_pushinteger(L, upb_fielddef_label(f->field)); - } else if (streql(str, "subdef")) { - lupb_def_getorcreate(L, upb_fielddef_subdef(f->field), false); - } else if (streql(str, "msgdef")) { - lupb_def_getorcreate(L, UPB_UPCAST(upb_fielddef_msgdef(f->field)), false); - } else { - luaL_error(L, "Invalid fielddef member '%s'", str); - } +static int lupb_def_isfrozen(lua_State *L) { + const upb_def *def = lupb_def_check(L, 1); + lua_pushboolean(L, upb_def_isfrozen(def)); return 1; } -static void lupb_fielddef_set(lua_State *L, upb_fielddef *f, - const char *field, int narg) { - if (!upb_fielddef_ismutable(f)) luaL_error(L, "fielddef is not mutable."); - if (streql(field, "name")) { - const char *name = lua_tostring(L, narg); - if (!name || !upb_fielddef_setname(f, name)) - luaL_error(L, "Invalid name"); - } else if (streql(field, "number")) { - if (!upb_fielddef_setnumber(f, lupb_touint32(L, narg, "number"))) - luaL_error(L, "Invalid number"); - } else if (streql(field, "type")) { - if (!upb_fielddef_settype(f, lupb_touint8(L, narg, "type"))) - luaL_error(L, "Invalid type"); - } else if (streql(field, "label")) { - if (!upb_fielddef_setlabel(f, lupb_touint8(L, narg, "label"))) - luaL_error(L, "Invalid label"); - } else if (streql(field, "type_name")) { - const char *name = lua_tostring(L, narg); - if (!name || !upb_fielddef_settypename(f, name)) - luaL_error(L, "Invalid type_name"); - } else if (streql(field, "default_value")) { - if (!upb_fielddef_type(f)) - luaL_error(L, "Must set type before setting default_value"); - upb_strref ref; - upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &ref)); +static int lupb_def_fullname(lua_State *L) { + const upb_def *def = lupb_def_check(L, 1); + lua_pushstring(L, upb_def_fullname(def)); + return 1; +} + +static int lupb_def_setfullname(lua_State *L) { + upb_def *def = lupb_def_checkmutable(L, 1); + const char *name = lupb_checkname(L, 2); + upb_def_setfullname(def, name); + return 0; +} + +#define LUPB_COMMON_DEF_METHODS \ + {"def_type", lupb_def_type}, \ + {"full_name", lupb_def_fullname}, \ + {"is_frozen", lupb_def_isfrozen}, \ + {"set_full_name", lupb_def_setfullname}, \ + + +/* lupb_fielddef **************************************************************/ + +static const upb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_FIELDDEF); + if (!r) luaL_typerror(L, narg, "upb fielddef"); + if (!r->refcounted) luaL_error(L, "called into dead fielddef"); + return upb_downcast_fielddef(r->def); +} + +static upb_fielddef *lupb_fielddef_checkmutable(lua_State *L, int narg) { + const upb_fielddef *f = lupb_fielddef_check(L, narg); + if (upb_fielddef_isfrozen(f)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_fielddef*)f; +} + +// Setter functions; these are called by both the constructor and the individual +// setter API calls like field:set_type(). + +static void lupb_fielddef_dosetdefault(lua_State *L, upb_fielddef *f, + int narg) { + int type = lua_type(L, narg); + upb_fieldtype_t upbtype = upb_fielddef_type(f); + if (type == LUA_TSTRING) { + if (!upb_fielddef_isstring(f) && upbtype != UPB_TYPE(ENUM)) + luaL_argerror(L, narg, "field does not expect a string default"); + size_t len; + const char *str = lua_tolstring(L, narg, &len); + if (!upb_fielddef_setdefaultstr(f, str, len)) + luaL_argerror(L, narg, "invalid default string for enum"); } else { - luaL_error(L, "Cannot set fielddef member '%s'", field); + upb_fielddef_setdefault(f, lupb_getvalue(L, narg, upbtype)); } } +static void lupb_fielddef_dosetlabel(lua_State *L, upb_fielddef *f, int narg) { + upb_label_t label = luaL_checknumber(L, narg); + if (!upb_fielddef_setlabel(f, label)) + luaL_argerror(L, narg, "invalid field label"); +} + +static void lupb_fielddef_dosetnumber(lua_State *L, upb_fielddef *f, int narg) { + int32_t n = luaL_checknumber(L, narg); + if (!upb_fielddef_setnumber(f, n)) + luaL_argerror(L, narg, "invalid field number"); +} + +static void lupb_fielddef_dosetsubdef(lua_State *L, upb_fielddef *f, int narg) { + const upb_def *def = NULL; + if (!lua_isnil(L, narg)) + def = lupb_def_check(L, narg); + if (!upb_fielddef_setsubdef(f, def)) + luaL_argerror(L, narg, "invalid subdef for this field"); +} + +static void lupb_fielddef_dosetsubdefname(lua_State *L, upb_fielddef *f, + int narg) { + const char *name = NULL; + if (!lua_isnil(L, narg)) + name = lupb_checkname(L, narg); + if (!upb_fielddef_setsubdefname(f, name)) + luaL_argerror(L, narg, "field type does not expect a subdef"); +} + +static void lupb_fielddef_dosettype(lua_State *L, upb_fielddef *f, int narg) { + int32_t type = luaL_checknumber(L, narg); + if (!upb_fielddef_settype(f, type)) + luaL_argerror(L, narg, "invalid field type"); +} + +// Setter API calls. These use the setter functions above. + +static int lupb_fielddef_setdefault(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetdefault(L, f, 2); + return 0; +} + +static int lupb_fielddef_setlabel(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetlabel(L, f, 2); + return 0; +} + +static int lupb_fielddef_setnumber(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetnumber(L, f, 2); + return 0; +} + +static int lupb_fielddef_setsubdef(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetsubdef(L, f, 2); + return 0; +} + +static int lupb_fielddef_setsubdefname(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetsubdefname(L, f, 2); + return 0; +} + +static int lupb_fielddef_settype(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosettype(L, f, 2); + return 0; +} + +// Constructor and other methods. + static int lupb_fielddef_new(lua_State *L) { - upb_fielddef *f = upb_fielddef_new(); - lupb_cache_create(L, f, "upb.fielddef"); + upb_fielddef *f = upb_fielddef_new(&f); + int narg = lua_gettop(L); + + lupb_def_pushnewrapper(L, upb_upcast(f), &f); - if (lua_gettop(L) == 0) return 1; + if (narg == 0) return 1; // User can specify initialization values like so: // upb.FieldDef{label=upb.LABEL_REQUIRED, name="my_field", number=5, // type=upb.TYPE_INT32, default_value=12, type_name="Foo"} luaL_checktype(L, 1, LUA_TTABLE); - // Iterate over table. - lua_pushnil(L); // first key - while (lua_next(L, 1)) { + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { luaL_checktype(L, -2, LUA_TSTRING); const char *key = lua_tostring(L, -2); - lupb_fielddef_set(L, f, key, -1); - lua_pop(L, 1); + int v = -1; + if (streql(key, "name")) upb_fielddef_setname(f, lupb_checkname(L, v)); + else if (streql(key, "number")) lupb_fielddef_dosetnumber(L, f, v); + else if (streql(key, "type")) lupb_fielddef_dosettype(L, f, v); + else if (streql(key, "label")) lupb_fielddef_dosetlabel(L, f, v); + else if (streql(key, "default_value")) ; // Defer to second pass. + else if (streql(key, "subdef")) ; // Defer to second pass. + else if (streql(key, "subdef_name")) ; // Defer to second pass. + else luaL_error(L, "Cannot set fielddef member '%s'", key); } + + // Have to do these in a second pass because these depend on the type, so we + // have to make sure the type is set if the user specified one. + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { + const char *key = lua_tostring(L, -2); + int v = -1; + if (streql(key, "default_value")) lupb_fielddef_dosetdefault(L, f, v); + else if (streql(key, "subdef")) lupb_fielddef_dosetsubdef(L, f, v); + else if (streql(key, "subdef_name")) lupb_fielddef_dosetsubdefname(L, f, v); + } + return 1; } -static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) { - bool created = lupb_cache_getorcreate(L, f, "upb.fielddef"); - if (created) upb_fielddef_ref(f); +static int lupb_fielddef_default(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + upb_fieldtype_t type = upb_fielddef_type(f); + if (upb_fielddef_default_is_symbolic(f)) + type = UPB_TYPE(STRING); + lupb_pushvalue(L, upb_fielddef_default(f), type); + return 1; } -static int lupb_fielddef_newindex(lua_State *L) { - lupb_fielddef *f = lupb_fielddef_check(L, 1); - lupb_fielddef_set(L, f->field, luaL_checkstring(L, 2), 3); - return 0; +static int lupb_fielddef_label(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushnumber(L, upb_fielddef_label(f)); + return 1; +} + +static int lupb_fielddef_number(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + int32_t num = upb_fielddef_number(f); + if (num) + lua_pushnumber(L, num); + else + lua_pushnil(L); + return 1; +} + +static int lupb_fielddef_selectorbase(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + if (!upb_fielddef_isfrozen(f)) + luaL_error(L, "_selectorbase is only defined for frozen fielddefs"); + lua_pushnumber(L, f->selector_base); + return 1; +} + +static int lupb_fielddef_hassubdef(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_hassubdef(f)); + return 1; +} + +static int lupb_fielddef_msgdef(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lupb_def_pushwrapper(L, upb_upcast(upb_fielddef_msgdef(f)), NULL); + return 1; +} + +static int lupb_fielddef_subdef(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + if (!upb_fielddef_hassubdef(f)) + luaL_error(L, "Tried to get subdef of non-message field"); + const upb_def *def = upb_fielddef_subdef(f); + lupb_def_pushwrapper(L, def, NULL); + return 1; +} + +static int lupb_fielddef_subdefname(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + if (!upb_fielddef_hassubdef(f)) + luaL_error(L, "Tried to get subdef name of non-message field"); + lua_pushstring(L, upb_fielddef_subdefname(f)); + return 1; +} + +static int lupb_fielddef_type(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushnumber(L, upb_fielddef_type(f)); + return 1; } static int lupb_fielddef_gc(lua_State *L) { - lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); - upb_fielddef_unref(lfielddef->field); + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_FIELDDEF); + upb_def_unref(r->def, r); + r->refcounted = NULL; return 0; } +static const struct luaL_Reg lupb_fielddef_m[] = { + LUPB_COMMON_DEF_METHODS + + {"default", lupb_fielddef_default}, + {"has_subdef", lupb_fielddef_hassubdef}, + {"label", lupb_fielddef_label}, + {"msgdef", lupb_fielddef_msgdef}, + {"name", lupb_def_fullname}, // name() is just an alias for fullname() + {"number", lupb_fielddef_number}, + {"subdef", lupb_fielddef_subdef}, + {"subdef_name", lupb_fielddef_subdefname}, + {"type", lupb_fielddef_type}, + + {"set_default", lupb_fielddef_setdefault}, + {"set_label", lupb_fielddef_setlabel}, + {"set_name", lupb_def_setfullname}, // name() is just an alias for fullname() + {"set_number", lupb_fielddef_setnumber}, + {"set_subdef", lupb_fielddef_setsubdef}, + {"set_subdef_name", lupb_fielddef_setsubdefname}, + {"set_type", lupb_fielddef_settype}, + + // Internal-only. + {"_selector_base", lupb_fielddef_selectorbase}, + + {NULL, NULL} +}; + static const struct luaL_Reg lupb_fielddef_mm[] = { {"__gc", lupb_fielddef_gc}, - {"__index", lupb_fielddef_index}, - {"__newindex", lupb_fielddef_newindex}, {NULL, NULL} }; /* lupb_msgdef ****************************************************************/ -static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { - lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef"); - luaL_argcheck(L, ldef != NULL, narg, "upb msgdef expected"); - return upb_downcast_msgdef(ldef->def); +const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_MSGDEF); + if (!r) luaL_typerror(L, narg, LUPB_MSGDEF); + if (!r->refcounted) luaL_error(L, "called into dead msgdef"); + return upb_downcast_msgdef(r->def); } -static int lupb_msgdef_gc(lua_State *L) { - lupb_def *ldef = luaL_checkudata(L, 1, "upb.msgdef"); - upb_def_unref(ldef->def); - return 0; +static upb_msgdef *lupb_msgdef_checkmutable(lua_State *L, int narg) { + const upb_msgdef *m = lupb_msgdef_check(L, narg); + if (upb_msgdef_isfrozen(m)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_msgdef*)m; } -static int lupb_msgdef_call(lua_State *L) { - upb_msgdef *md = lupb_msgdef_check(L, 1); - lupb_msg_pushnew(L, md); - return 1; +static int lupb_msgdef_gc(lua_State *L) { + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_MSGDEF); + upb_def_unref(r->def, r); + r->refcounted = NULL; + return 0; } static int lupb_msgdef_new(lua_State *L) { - upb_msgdef *md = upb_msgdef_new(); - lupb_cache_create(L, md, "upb.msgdef"); + int narg = lua_gettop(L); + upb_msgdef *md = upb_msgdef_new(&md); + lupb_def_pushnewrapper(L, upb_upcast(md), &md); - if (lua_gettop(L) == 0) return 1; + if (narg == 0) return 1; // User can specify initialization values like so: - // upb.MessageDef{fqname="MyMessage", extstart=8000, fields={...}} + // upb.MessageDef{full_name="MyMessage", extstart=8000, fields={...}} luaL_checktype(L, 1, LUA_TTABLE); - // Iterate over table. - lua_pushnil(L); // first key - while (lua_next(L, 1)) { + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { luaL_checktype(L, -2, LUA_TSTRING); const char *key = lua_tostring(L, -2); - if (streql(key, "fqname")) { // fqname="MyMessage" + if (streql(key, "full_name")) { // full_name="MyMessage" const char *fqname = lua_tostring(L, -1); - if (!fqname || !upb_def_setfqname(UPB_UPCAST(md), fqname)) - luaL_error(L, "Invalid fqname"); + if (!fqname || !upb_def_setfullname(upb_upcast(md), fqname)) + luaL_error(L, "Invalid full_name"); } else if (streql(key, "fields")) { // fields={...} // Iterate over the list of fields. - lua_pushnil(L); - luaL_checktype(L, -2, LUA_TTABLE); - while (lua_next(L, -2)) { - lupb_fielddef *f = lupb_fielddef_check(L, -1); - if (!upb_msgdef_addfield(md, f->field)) { + luaL_checktype(L, -1, LUA_TTABLE); + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, -1); + if (!upb_msgdef_addfield(md, f, NULL)) { // TODO: more specific error. luaL_error(L, "Could not add field."); } - lua_pop(L, 1); } } else { // TODO: extrange= luaL_error(L, "Unknown initializer key '%s'", key); } + } + return 1; +} + +static int lupb_msgdef_add(lua_State *L) { + upb_msgdef *m = lupb_msgdef_checkmutable(L, 1); + luaL_checktype(L, 2, LUA_TTABLE); + int n = lua_rawlen(L, 2); + // TODO: add upb interface that lets us avoid this malloc/free. + upb_fielddef **fields = malloc(n * sizeof(upb_fielddef*)); + for (int i = 0; i < n; i++) { + lua_rawgeti(L, -1, i + 1); + fields[i] = lupb_fielddef_checkmutable(L, -1); lua_pop(L, 1); } + + bool success = upb_msgdef_addfields(m, fields, n, NULL); + free(fields); + if (!success) luaL_error(L, "fields could not be added"); + return 0; +} + +static int lupb_msgdef_len(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lua_pushinteger(L, upb_msgdef_numfields(m)); return 1; } -static int lupb_msgdef_fqname(lua_State *L) { - upb_msgdef *m = lupb_msgdef_check(L, 1); - lua_pushstring(L, m->base.fqname); +static int lupb_msgdef_selectorcount(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + lua_pushinteger(L, m->selector_count); return 1; } -static int lupb_msgdef_fieldbyname(lua_State *L) { - upb_msgdef *m = lupb_msgdef_check(L, 1); - upb_fielddef *f = upb_msgdef_ntof(m, luaL_checkstring(L, 2)); - if (f) { - lupb_fielddef_getorcreate(L, f); +static int lupb_msgdef_field(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + int type = lua_type(L, 2); + const upb_fielddef *f; + if (type == LUA_TNUMBER) { + f = upb_msgdef_itof(m, lua_tointeger(L, 2)); + } else if (type == LUA_TSTRING) { + f = upb_msgdef_ntof(m, lua_tostring(L, 2)); } else { - lua_pushnil(L); + const char *msg = lua_pushfstring(L, "number or string expected, got %s", + luaL_typename(L, 2)); + return luaL_argerror(L, 2, msg); } + + lupb_def_pushwrapper(L, upb_upcast(f), NULL); return 1; } -static int lupb_msgdef_fieldbynum(lua_State *L) { - upb_msgdef *m = lupb_msgdef_check(L, 1); - int num = luaL_checkint(L, 2); - upb_fielddef *f = upb_msgdef_itof(m, num); - if (f) { - lupb_fielddef_getorcreate(L, f); - } else { - lua_pushnil(L); - } +static int lupb_msgiter_next(lua_State *L) { + upb_msg_iter *i = lua_touserdata(L, lua_upvalueindex(1)); + if (upb_msg_done(i)) return 0; + lupb_def_pushwrapper(L, upb_upcast(upb_msg_iter_field(i)), NULL); + upb_msg_next(i); + return 1; +} + +static int lupb_msgdef_fields(lua_State *L) { + const upb_msgdef *m = lupb_msgdef_check(L, 1); + upb_msg_iter *i = lua_newuserdata(L, sizeof(upb_msg_iter)); + upb_msg_begin(i, m); + lua_pushcclosure(L, &lupb_msgiter_next, 1); return 1; } static const struct luaL_Reg lupb_msgdef_mm[] = { - {"__call", lupb_msgdef_call}, {"__gc", lupb_msgdef_gc}, + {"__len", lupb_msgdef_len}, {NULL, NULL} }; static const struct luaL_Reg lupb_msgdef_m[] = { - {"fieldbyname", lupb_msgdef_fieldbyname}, - {"fieldbynum", lupb_msgdef_fieldbynum}, - {"fqname", lupb_msgdef_fqname}, + LUPB_COMMON_DEF_METHODS + {"add", lupb_msgdef_add}, + {"field", lupb_msgdef_field}, + {"fields", lupb_msgdef_fields}, + + // Internal-only. + {"_selector_count", lupb_msgdef_selectorcount}, + {NULL, NULL} }; /* lupb_enumdef ***************************************************************/ -static upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) { - lupb_def *ldef = luaL_checkudata(L, narg, "upb.enumdef"); - return upb_downcast_enumdef(ldef->def); +const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_ENUMDEF); + if (!r) luaL_typerror(L, narg, LUPB_ENUMDEF); + if (!r->refcounted) luaL_error(L, "called into dead enumdef"); + return upb_downcast_enumdef(r->def); +} + +static upb_enumdef *lupb_enumdef_checkmutable(lua_State *L, int narg) { + const upb_enumdef *f = lupb_enumdef_check(L, narg); + if (upb_enumdef_isfrozen(f)) + luaL_typerror(L, narg, "not allowed on frozen value"); + return (upb_enumdef*)f; } static int lupb_enumdef_gc(lua_State *L) { - upb_enumdef *e = lupb_enumdef_check(L, 1); - upb_def_unref(UPB_UPCAST(e)); + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_ENUMDEF); + upb_def_unref(r->def, r); + r->refcounted = NULL; return 0; } -static int lupb_enumdef_name(lua_State *L) { - upb_enumdef *e = lupb_enumdef_check(L, 1); - lua_pushstring(L, e->base.fqname); +static int lupb_enumdef_new(lua_State *L) { + int narg = lua_gettop(L); + upb_enumdef *e = upb_enumdef_new(&e); + lupb_def_pushnewrapper(L, upb_upcast(e), &e); + + if (narg == 0) return 1; + + // User can specify initialization values like so: + // upb.EnumDef{full_name="MyEnum", + // values={ + // {"FOO_VALUE_1", 1}, + // {"FOO_VALUE_2", 2} + // } + // } + luaL_checktype(L, 1, LUA_TTABLE); + for (lua_pushnil(L); lua_next(L, 1); lua_pop(L, 1)) { + luaL_checktype(L, -2, LUA_TSTRING); + const char *key = lua_tostring(L, -2); + if (streql(key, "values")) { + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + lua_rawgeti(L, -1, 1); + luaL_checktype(L, -1, LUA_TSTRING); + const char *name = lua_tostring(L, -1); + lua_rawgeti(L, -2, 2); + int32_t num = lupb_checkint32(L, -1, "value"); + upb_status status = UPB_STATUS_INIT; + upb_enumdef_addval(e, name, num, &status); + lupb_checkstatus(L, &status); + lua_pop(L, 2); // The key/val we got from lua_rawgeti() + } + } else if (streql(key, "full_name")) { + const char *fullname = lua_tostring(L, -1); + if (!fullname || !upb_def_setfullname(upb_upcast(e), fullname)) + luaL_error(L, "Invalid full_name"); + } else { + luaL_error(L, "Unknown initializer key '%s'", key); + } + } + return 1; +} + +static int lupb_enumdef_add(lua_State *L) { + upb_enumdef *e = lupb_enumdef_checkmutable(L, 1); + const char *name = lupb_checkname(L, 2); + int32_t num = lupb_checkint32(L, 3, "value"); + upb_status status = UPB_STATUS_INIT; + upb_enumdef_addval(e, name, num, &status); + lupb_checkstatus(L, &status); + return 0; +} + +static int lupb_enumdef_len(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + lua_pushinteger(L, upb_enumdef_numvals(e)); + return 1; +} + +static int lupb_enumdef_value(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + int type = lua_type(L, 2); + if (type == LUA_TNUMBER) { + // Pushes "nil" for a NULL pointer. + lua_pushstring(L, upb_enumdef_iton(e, lupb_checkint32(L, 2, "value"))); + } else if (type == LUA_TSTRING) { + int32_t num; + if (upb_enumdef_ntoi(e, lua_tostring(L, 2), &num)) { + lua_pushnumber(L, num); + } else { + lua_pushnil(L); + } + } else { + const char *msg = lua_pushfstring(L, "number or string expected, got %s", + luaL_typename(L, 2)); + return luaL_argerror(L, 2, msg); + } + return 1; +} + +static int lupb_enumiter_next(lua_State *L) { + upb_enum_iter *i = lua_touserdata(L, lua_upvalueindex(1)); + if (upb_enum_done(i)) return 0; + lua_pushstring(L, upb_enum_iter_name(i)); + lua_pushnumber(L, upb_enum_iter_number(i)); + upb_enum_next(i); + return 2; +} + +static int lupb_enumdef_values(lua_State *L) { + const upb_enumdef *e = lupb_enumdef_check(L, 1); + upb_enum_iter *i = lua_newuserdata(L, sizeof(upb_enum_iter)); + upb_enum_begin(i, e); + lua_pushcclosure(L, &lupb_enumiter_next, 1); return 1; } static const struct luaL_Reg lupb_enumdef_mm[] = { {"__gc", lupb_enumdef_gc}, + {"__len", lupb_enumdef_len}, {NULL, NULL} }; static const struct luaL_Reg lupb_enumdef_m[] = { - {"name", lupb_enumdef_name}, + LUPB_COMMON_DEF_METHODS + {"add", lupb_enumdef_add}, + {"value", lupb_enumdef_value}, + {"values", lupb_enumdef_values}, {NULL, NULL} }; /* lupb_symtab ****************************************************************/ -typedef struct { - upb_symtab *symtab; -} lupb_symtab; - -static upb_accessor_vtbl *lupb_accessor(upb_fielddef *f); - // Inherits a ref on the symtab. // Checks that narg is a proper lupb_symtab object. If it is, leaves its // metatable on the stack for cache lookups/updates. -lupb_symtab *lupb_symtab_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.symtab"); +upb_symtab *lupb_symtab_check(lua_State *L, int narg) { + lupb_refcounted *r = luaL_checkudata(L, narg, LUPB_SYMTAB); + if (!r) luaL_typerror(L, narg, LUPB_SYMTAB); + if (!r->refcounted) luaL_error(L, "called into dead symtab"); + return r->symtab; } // narg is a lua table containing a list of defs to add. @@ -509,93 +867,91 @@ void lupb_symtab_doadd(lua_State *L, upb_symtab *s, int narg) { // Iterate over table twice. First iteration to count entries and // check constraints. int n = 0; - lua_pushnil(L); // first key - while (lua_next(L, narg)) { + for (lua_pushnil(L); lua_next(L, narg); lua_pop(L, 1)) { lupb_def_check(L, -1); ++n; - lua_pop(L, 1); } // Second iteration to build deflist and layout. upb_def **defs = malloc(n * sizeof(*defs)); n = 0; - lua_pushnil(L); // first key - while (lua_next(L, 1)) { - upb_def *def = lupb_def_check(L, -1)->def; + for (lua_pushnil(L); lua_next(L, narg); lua_pop(L, 1)) { + upb_def *def = lupb_def_checkmutable(L, -1); defs[n++] = def; - upb_msgdef *md = upb_dyncast_msgdef(def); - if (md) { - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - upb_fielddef_setaccessor(f, lupb_accessor(f)); - } - upb_msgdef_layout(md); - } - lua_pop(L, 1); } upb_status status = UPB_STATUS_INIT; - upb_symtab_add(s, defs, n, &status); + upb_symtab_add(s, defs, n, NULL, &status); free(defs); lupb_checkstatus(L, &status); } static int lupb_symtab_new(lua_State *L) { - upb_symtab *s = upb_symtab_new(); - lupb_cache_create(L, s, "upb.symtab"); - if (lua_gettop(L) == 0) return 1; - lupb_symtab_doadd(L, s, 1); + int narg = lua_gettop(L); + upb_symtab *s = upb_symtab_new(&s); + lupb_refcounted_pushnewrapper(L, upb_upcast(s), LUPB_SYMTAB, &s); + if (narg > 0) lupb_symtab_doadd(L, s, 1); return 1; } static int lupb_symtab_add(lua_State *L) { - lupb_symtab *s = lupb_symtab_check(L, 1); - lupb_symtab_doadd(L, s->symtab, 2); + lupb_symtab_doadd(L, lupb_symtab_check(L, 1), 2); return 0; } static int lupb_symtab_gc(lua_State *L) { - lupb_symtab *s = lupb_symtab_check(L, 1); - upb_symtab_unref(s->symtab); + lupb_refcounted *r = luaL_checkudata(L, 1, LUPB_SYMTAB); + upb_symtab_unref(r->symtab, r); + r->refcounted = NULL; return 0; } static int lupb_symtab_lookup(lua_State *L) { - lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab *s = lupb_symtab_check(L, 1); for (int i = 2; i <= lua_gettop(L); i++) { - const upb_def *def = upb_symtab_lookup(s->symtab, luaL_checkstring(L, i)); - if (def) { - lupb_def_getorcreate(L, def, true); - } else { - lua_pushnil(L); - } + const upb_def *def = + upb_symtab_lookup(s, luaL_checkstring(L, i), &def); + lupb_def_pushwrapper(L, def, &def); lua_replace(L, i); } return lua_gettop(L) - 1; } static int lupb_symtab_getdefs(lua_State *L) { - lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab *s = lupb_symtab_check(L, 1); upb_deftype_t type = luaL_checkint(L, 2); int count; - const upb_def **defs = upb_symtab_getdefs(s->symtab, &count, type); + const upb_def **defs = upb_symtab_getdefs(s, type, &defs, &count); // Create the table in which we will return the defs. lua_createtable(L, count, 0); for (int i = 0; i < count; i++) { const upb_def *def = defs[i]; - lupb_def_getorcreate(L, def, true); + lupb_def_pushwrapper(L, def, &defs); lua_rawseti(L, -2, i + 1); } free(defs); return 1; } +// This is a *temporary* API that will be removed once pending refactorings are +// complete (it does not belong here in core because it depends on both +// the descriptor.proto schema and the protobuf binary format. +static int lupb_symtab_load_descriptor(lua_State *L) { + size_t len; + upb_symtab *s = lupb_symtab_check(L, 1); + const char *str = luaL_checklstring(L, 2, &len); + upb_status status = UPB_STATUS_INIT; + upb_load_descriptor_into_symtab(s, str, len, &status); + lupb_checkstatus(L, &status); + return 0; +} + static const struct luaL_Reg lupb_symtab_m[] = { {"add", lupb_symtab_add}, {"getdefs", lupb_symtab_getdefs}, {"lookup", lupb_symtab_lookup}, + {"load_descriptor", lupb_symtab_load_descriptor}, {NULL, NULL} }; @@ -605,343 +961,28 @@ static const struct luaL_Reg lupb_symtab_mm[] = { }; -/* lupb_msg********************************************************************/ - -// Messages are userdata. Primitive values (numbers and bools, and their -// hasbits) are stored right in the userdata. Other values are stored using -// integer entries in the environment table and no hasbits are used (since -// "nil" in the environment table can indicate "not present"). -// -// The environment table looks like: -// {msgdef, } - -// Must pass a upb_fielddef as the pointer. -static void lupb_array_pushnew(lua_State *L, const void *f); - -static void *lupb_msg_check(lua_State *L, int narg, upb_msgdef **md) { - void *msg = luaL_checkudata(L, narg, "upb.msg"); - luaL_argcheck(L, msg != NULL, narg, "msg expected"); - // If going all the way to the environment table for the msgdef is an - // efficiency issue, we could put the pointer right in the userdata. - lua_getfenv(L, narg); - lua_rawgeti(L, -1, 1); - // Shouldn't have to check msgdef userdata validity, environment table can't - // be accessed from Lua. - lupb_def *lmd = lua_touserdata(L, -1); - *md = upb_downcast_msgdef(lmd->def); - return msg; -} - -static void lupb_msg_pushnew(lua_State *L, const void *md) { - void *msg = lua_newuserdata(L, upb_msgdef_size(md)); - luaL_getmetatable(L, "upb.msg"); - assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. - lua_setmetatable(L, -2); - upb_msg_clear(msg, md); - lua_getfenv(L, -1); - lupb_cache_getorcreate(L, (void*)md, "upb.msgdef"); - lua_rawseti(L, -2, 1); - lua_pop(L, 1); // Pop the fenv. -} - -static int lupb_msg_new(lua_State *L) { - upb_msgdef *md = lupb_msgdef_check(L, 1); - lupb_msg_pushnew(L, md); - return 1; -} - -static int lupb_msg_index(lua_State *L) { - upb_msgdef *md; - void *m = lupb_msg_check(L, 1, &md); - upb_fielddef *f = upb_msgdef_ntof(md, luaL_checkstring(L, 2)); - if (!f) luaL_argerror(L, 2, "not a field name"); - if (upb_isprimitivetype(upb_fielddef_type(f))) { - upb_value v = upb_msg_has(m, f) ? upb_msg_get(m, f) : upb_fielddef_default(f); - lupb_pushvalue(L, v, f); - } else { - lua_getfenv(L, 1); - lua_rawgeti(L, -1, f->offset); - if (lua_isnil(L, -1)) { - // Need to lazily create array, string, or submessage. - if (upb_isseq(f)) { - lupb_array_pushnew(L, f); - } else if (upb_isstring(f)) { - // TODO: (need to figure out default string ownership). - } else if (upb_issubmsg(f)) { - lupb_msg_pushnew(L, upb_downcast_msgdef(upb_fielddef_subdef(f))); - } else { - luaL_error(L, "internal error"); - } - lua_rawseti(L, -2, f->offset); - } - } - return 1; -} - -static int lupb_msg_newindex(lua_State *L) { - upb_msgdef *md; - void *m = lupb_msg_check(L, 1, &md); - upb_fielddef *f = upb_msgdef_ntof(md, luaL_checkstring(L, 2)); - if (!f) luaL_error(L, "not a field name"); - if (upb_isprimitivetype(upb_fielddef_type(f))) { - if (lua_isnil(L, 3)) - upb_msg_clearbit(m, f); - else - upb_msg_set(m, f, lupb_getvalue(L, 3, f, NULL)); - } else { - if (!lua_isnil(L, 3)) lupb_typecheck(L, 3, f); - lua_getfenv(L, 1); - lua_pushvalue(L, 3); - lua_rawseti(L, -1, f->offset); - } - return 0; -} - -static const struct luaL_Reg lupb_msg_mm[] = { - {"__index", lupb_msg_index}, - {"__newindex", lupb_msg_newindex}, - {NULL, NULL} -}; - -// Functions that operate on msgdefs but do not live in the msgdef namespace. -static int lupb_clear(lua_State *L) { - upb_msgdef *md; - void *m = lupb_msg_check(L, 1, &md); - upb_msg_clear(m, md); - return 0; -} - -static int lupb_has(lua_State *L) { - upb_msgdef *md; - void *m = lupb_msg_check(L, 1, &md); - upb_fielddef *f = upb_msgdef_ntof(md, luaL_checkstring(L, 2)); - if (!f) luaL_argerror(L, 2, "not a field name"); - lua_pushboolean(L, upb_msg_has(m, f)); - return 1; -} - -static int lupb_msgdef(lua_State *L) { - upb_msgdef *md; - lupb_msg_check(L, 1, &md); - lupb_def_getorcreate(L, UPB_UPCAST(md), false); - return 1; -} +/* lupb toplevel **************************************************************/ -// Accessors for arrays, strings, and submessages need access to the current -// userdata's environment table, which can only be stored in Lua space. -// Options for storing it are: -// -// - put the env tables for all messages and arrays in the registry, keyed by -// userdata pointer (light userdata), or by a reference using luaL_ref(). -// Then we can just let upb's parse stack track the stack of env tables. -// Easy but requires all messages and arrays to be in the registry, which -// seems too heavyweight. -// -// - store the stack of env tables in the Lua stack. Convenient, but requires -// special code to handle resumable decoders. -// -// There is also the question of how to obtain the lua_State* pointer. -// The main options for this are: -// -// - make our closure point to a struct: -// struct { void *msg; lua_State *L; } -// But then we can't use standard accessors, which expect the closure -// to point to the data itself. Using the standard accessors for -// primitive values is both a simplicity and a performance win. -// -// - store a lua_State* pointer inside each userdata. Convenient and -// efficient, but makes every message sizeof(void*) larger. -// Currently we take this route. -// -// - use thread-local storage. Convenient and efficient, but not portable. - -typedef void createfunc_t(lua_State *L, const void *param); - -static upb_sflow_t lupb_msg_start(void *m, const upb_fielddef *f, bool array, - createfunc_t *pushnew, const void *param) { - lua_State *L = *(lua_State**)m; - int offset = array ? lua_rawlen(L, -1) : f->offset; - if (!lua_checkstack(L, 3)) luaL_error(L, "stack full"); - lua_rawgeti(L, -1, offset); - if (lua_isnil(L, -1)) { - lua_pop(L, 1); - pushnew(L, param); - lua_pushvalue(L, -1); - lua_rawseti(L, -3, offset); +static int lupb_def_freeze(lua_State *L) { + int n = lua_gettop(L); + upb_def **defs = malloc(n * sizeof(upb_def*)); + for (int i = 0; i < n; i++) { + // Could allow an array of defs here also. + defs[i] = lupb_def_checkmutable(L, i + 1); } - void *subval = lua_touserdata(L, -1); - lua_getfenv(L, -1); - lua_replace(L, -2); // Replace subval userdata with fenv. - return UPB_CONTINUE_WITH(subval); -} - -static upb_flow_t lupb_msg_string(void *m, upb_value fval, upb_value val, - bool array) { - // Could add lazy materialization of strings here. - const upb_fielddef *f = upb_value_getfielddef(fval); - lua_State *L = *(lua_State**)m; - int offset = array ? lua_rawlen(L, -1) : f->offset; - if (!lua_checkstack(L, 1)) luaL_error(L, "stack full"); - lupb_pushstring(L, upb_value_getstrref(val)); - lua_rawseti(L, -2, offset); - return UPB_CONTINUE; -} - -static upb_sflow_t lupb_msg_startseq(void *m, upb_value fval) { - const upb_fielddef *f = upb_value_getfielddef(fval); - return lupb_msg_start(m, f, false, lupb_array_pushnew, f); -} - -static upb_sflow_t lupb_msg_startsubmsg(void *m, upb_value fval) { - const upb_fielddef *f = upb_value_getfielddef(fval); - return lupb_msg_start(m, f, false, lupb_msg_pushnew, upb_fielddef_subdef(f)); -} - -static upb_sflow_t lupb_msg_startsubmsg_r(void *a, upb_value fval) { - const upb_fielddef *f = upb_value_getfielddef(fval); - return lupb_msg_start(a, f, true, lupb_msg_pushnew, upb_fielddef_subdef(f)); -} - -static upb_flow_t lupb_msg_stringval(void *m, upb_value fval, upb_value val) { - return lupb_msg_string(m, fval, val, false); -} - -static upb_flow_t lupb_msg_stringval_r(void *a, upb_value fval, upb_value val) { - return lupb_msg_string(a, fval, val, true); -} - -#define STDMSG(type, size) static upb_accessor_vtbl vtbl = { \ - &lupb_msg_startsubmsg, \ - &upb_stdmsg_set ## type, \ - &lupb_msg_startseq, \ - &lupb_msg_startsubmsg_r, \ - &upb_stdmsg_set ## type ## _r, \ - &upb_stdmsg_has, \ - &upb_stdmsg_getptr, \ - &upb_stdmsg_get ## type, \ - &upb_stdmsg_seqbegin, \ - &upb_stdmsg_ ## size ## byte_seqnext, \ - &upb_stdmsg_seqget ## type}; - -#define RETURN_STDMSG(type, size) { STDMSG(type, size); return &vtbl; } - -static upb_accessor_vtbl *lupb_accessor(upb_fielddef *f) { - switch (f->type) { - case UPB_TYPE(DOUBLE): RETURN_STDMSG(double, 8) - case UPB_TYPE(FLOAT): RETURN_STDMSG(float, 4) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): RETURN_STDMSG(uint64, 8) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): RETURN_STDMSG(int64, 8) - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(ENUM): - case UPB_TYPE(SFIXED32): RETURN_STDMSG(int32, 4) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): RETURN_STDMSG(uint32, 4) - case UPB_TYPE(BOOL): { STDMSG(bool, 1); return &vtbl; } - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): RETURN_STDMSG(ptr, 8) // TODO: 32-bit - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): { - STDMSG(ptr, 8); - vtbl.set = &lupb_msg_stringval; - vtbl.append = &lupb_msg_stringval_r; - return &vtbl; - } - } - return NULL; -} - - -/* lupb_array ****************************************************************/ - -// Array: we store all elements in the environment table. Could optimize by -// storing primitive arrays in our own memory; this would be significantly more -// space efficient. Lua array elements are 16 bytes each; our own array would -// be 1/4 the space for 32-bit integers, or 1/16 the space for booleans. -// -// The first element of the environment table stores our type (which will be -// either an integer from upb.TYPE_* or a upb.msgdef), the remaining elements -// store the elements. We always keep all elements contiguous so we can use -// lua_objlen()/lua_rawlen() (for Lua 5.1/5.2 respectively) to report its len). - -// narg is offset of environment table. -static size_t lupb_array_getlen(lua_State *L, int narg) { - return lua_rawlen(L, narg) - 1; -} - -static void lupb_array_check(lua_State *L, int narg) { - if (!luaL_checkudata(L, narg, "upb.array")) - luaL_typerror(L, narg, "upb array"); -} - -static void lupb_array_pushnew(lua_State *L, const void *f) { - (void)L; - (void)f; -} - -static int lupb_array_new(lua_State *L) { - (void)L; - return 0; -} - -static int lupb_array_len(lua_State *L) { - lupb_array_check(L, 1); - lua_getfenv(L, 1); - lua_pushnumber(L, lupb_array_getlen(L, -1)); - return 1; -} - -static int lupb_array_index(lua_State *L) { - assert(lua_gettop(L) == 2); // __index should always be called with 2 args. - lupb_array_check(L, 1); - lua_Number num = luaL_checknumber(L, 2); - if (!lupb_isint(num)) luaL_typerror(L, 2, "integer"); - - lua_getfenv(L, 1); - size_t len = lupb_array_getlen(L, -1); - if (num < 1 || num > len) luaL_error(L, "array bounds check failed"); - lua_rawgeti(L, -1, num + 1); - return 1; -} - -static int lupb_array_newindex(lua_State *L) { - assert(lua_gettop(L) == 3); // __newindex should always be called with 3 args. - lupb_array_check(L, 1); - lua_Number num = luaL_checknumber(L, 2); - if (rint(num) != num) luaL_typerror(L, 2, "integer"); - - lua_getfenv(L, 1); - size_t len = lupb_array_getlen(L, -1); - // We only allow extending the index one beyond the end. - if (num < 1 || num > len + 1) luaL_error(L, "array bounds check failed"); - lua_pushvalue(L, 3); - lua_rawseti(L, -2, num); + upb_status s = UPB_STATUS_INIT; + upb_def_freeze(defs, n, &s); + free(defs); + lupb_checkstatus(L, &s); return 0; } -static const struct luaL_Reg lupb_array_mm[] = { - {"__len", lupb_array_len}, - {"__index", lupb_array_index}, - {"__newindex", lupb_array_newindex}, - {NULL, NULL} -}; - -/* lupb toplevel **************************************************************/ - static const struct luaL_Reg lupb_toplevel_m[] = { - {"SymbolTable", lupb_symtab_new}, - {"MessageDef", lupb_msgdef_new}, + {"EnumDef", lupb_enumdef_new}, {"FieldDef", lupb_fielddef_new}, - - {"Message", lupb_msg_new}, - {"Array", lupb_array_new}, - - {"clear", lupb_clear}, - {"msgdef", lupb_msgdef}, - {"has", lupb_has}, + {"MessageDef", lupb_msgdef_new}, + {"SymbolTable", lupb_symtab_new}, + {"freeze", lupb_def_freeze}, {NULL, NULL} }; @@ -950,14 +991,12 @@ static const struct luaL_Reg lupb_toplevel_m[] = { static void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m, const luaL_Reg *mm) { luaL_newmetatable(L, name); - luaL_register(L, NULL, mm); // Register all mm in the metatable. + lupb_setfuncs(L, mm); // Register all mm in the metatable. lua_createtable(L, 0, 0); - if (m) { - // Methods go in the mt's __index method. This implies that you can't - // implement __index and also set methods yourself. - luaL_register(L, NULL, m); - lua_setfield(L, -2, "__index"); - } + // Methods go in the mt's __index method. This implies that you can't + // implement __index. + lupb_setfuncs(L, m); + lua_setfield(L, -2, "__index"); lua_pop(L, 1); // The mt. } @@ -967,46 +1006,71 @@ static void lupb_setfieldi(lua_State *L, const char *field, int i) { } int luaopen_upb(lua_State *L) { - lupb_register_type(L, "upb.msgdef", lupb_msgdef_m, lupb_msgdef_mm); - lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm); - lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm); - lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm); - - lupb_register_type(L, "upb.msg", NULL, lupb_msg_mm); - lupb_register_type(L, "upb.array", NULL, lupb_msg_mm); + lupb_register_type(L, LUPB_MSGDEF, lupb_msgdef_m, lupb_msgdef_mm); + lupb_register_type(L, LUPB_ENUMDEF, lupb_enumdef_m, lupb_enumdef_mm); + lupb_register_type(L, LUPB_FIELDDEF, lupb_fielddef_m, lupb_fielddef_mm); + lupb_register_type(L, LUPB_SYMTAB, lupb_symtab_m, lupb_symtab_mm); // Create our object cache. - lua_createtable(L, 0, 0); + lua_newtable(L); lua_createtable(L, 0, 1); // Cache metatable. lua_pushstring(L, "v"); // Values are weak. lua_setfield(L, -2, "__mode"); - lua_setfield(L, LUA_REGISTRYINDEX, "upb.objcache"); + lua_setmetatable(L, -2); + lua_setfield(L, LUA_REGISTRYINDEX, LUPB_OBJCACHE); + + lupb_newlib(L, "upb", lupb_toplevel_m); + + // Define a couple functions as Lua source (kept here instead of a separate + // Lua file so that upb.so is self-contained) + const char *lua_source = + "return function(upb)\n" + " upb.build_defs = function(defs)\n" + " local symtab = upb.SymbolTable(defs)\n" + " return symtab:getdefs(upb.DEF_ANY)\n" + " end\n" + "end"; - luaL_register(L, "upb", lupb_toplevel_m); + if (luaL_dostring(L, lua_source) != 0) + lua_error(L); + + // Call the chunk that will define the extra functions on upb, passing our + // package dictionary as the argument. + lua_pushvalue(L, -2); + lua_call(L, 1, 0); // Register constants. lupb_setfieldi(L, "LABEL_OPTIONAL", UPB_LABEL(OPTIONAL)); lupb_setfieldi(L, "LABEL_REQUIRED", UPB_LABEL(REQUIRED)); lupb_setfieldi(L, "LABEL_REPEATED", UPB_LABEL(REPEATED)); - lupb_setfieldi(L, "TYPE_DOUBLE", UPB_TYPE(DOUBLE)); - lupb_setfieldi(L, "TYPE_FLOAT", UPB_TYPE(FLOAT)); - lupb_setfieldi(L, "TYPE_INT64", UPB_TYPE(INT64)); - lupb_setfieldi(L, "TYPE_UINT64", UPB_TYPE(UINT64)); - lupb_setfieldi(L, "TYPE_INT32", UPB_TYPE(INT32)); - lupb_setfieldi(L, "TYPE_FIXED64", UPB_TYPE(FIXED64)); - lupb_setfieldi(L, "TYPE_FIXED32", UPB_TYPE(FIXED32)); - lupb_setfieldi(L, "TYPE_BOOL", UPB_TYPE(BOOL)); - lupb_setfieldi(L, "TYPE_STRING", UPB_TYPE(STRING)); - lupb_setfieldi(L, "TYPE_GROUP", UPB_TYPE(GROUP)); - lupb_setfieldi(L, "TYPE_MESSAGE", UPB_TYPE(MESSAGE)); - lupb_setfieldi(L, "TYPE_BYTES", UPB_TYPE(BYTES)); - lupb_setfieldi(L, "TYPE_UINT32", UPB_TYPE(UINT32)); - lupb_setfieldi(L, "TYPE_ENUM", UPB_TYPE(ENUM)); - lupb_setfieldi(L, "TYPE_SFIXED32", UPB_TYPE(SFIXED32)); - lupb_setfieldi(L, "TYPE_SFIXED64", UPB_TYPE(SFIXED64)); - lupb_setfieldi(L, "TYPE_SINT32", UPB_TYPE(SINT32)); - lupb_setfieldi(L, "TYPE_SINT64", UPB_TYPE(SINT64)); + lupb_setfieldi(L, "TYPE_DOUBLE", UPB_TYPE(DOUBLE)); + lupb_setfieldi(L, "TYPE_FLOAT", UPB_TYPE(FLOAT)); + lupb_setfieldi(L, "TYPE_INT64", UPB_TYPE(INT64)); + lupb_setfieldi(L, "TYPE_UINT64", UPB_TYPE(UINT64)); + lupb_setfieldi(L, "TYPE_INT32", UPB_TYPE(INT32)); + lupb_setfieldi(L, "TYPE_FIXED64", UPB_TYPE(FIXED64)); + lupb_setfieldi(L, "TYPE_FIXED32", UPB_TYPE(FIXED32)); + lupb_setfieldi(L, "TYPE_BOOL", UPB_TYPE(BOOL)); + lupb_setfieldi(L, "TYPE_STRING", UPB_TYPE(STRING)); + lupb_setfieldi(L, "TYPE_GROUP", UPB_TYPE(GROUP)); + lupb_setfieldi(L, "TYPE_MESSAGE", UPB_TYPE(MESSAGE)); + lupb_setfieldi(L, "TYPE_BYTES", UPB_TYPE(BYTES)); + lupb_setfieldi(L, "TYPE_UINT32", UPB_TYPE(UINT32)); + lupb_setfieldi(L, "TYPE_ENUM", UPB_TYPE(ENUM)); + lupb_setfieldi(L, "TYPE_SFIXED32", UPB_TYPE(SFIXED32)); + lupb_setfieldi(L, "TYPE_SFIXED64", UPB_TYPE(SFIXED64)); + lupb_setfieldi(L, "TYPE_SINT32", UPB_TYPE(SINT32)); + lupb_setfieldi(L, "TYPE_SINT64", UPB_TYPE(SINT64)); + + lupb_setfieldi(L, "DEF_MSG", UPB_DEF_MSG); + lupb_setfieldi(L, "DEF_FIELD", UPB_DEF_FIELD); + lupb_setfieldi(L, "DEF_ENUM", UPB_DEF_ENUM); + lupb_setfieldi(L, "DEF_SERVICE", UPB_DEF_SERVICE); + lupb_setfieldi(L, "DEF_ANY", UPB_DEF_ANY); return 1; // Return package table. } + +// Alternate names so that the library can be loaded as upb5_1 etc. +int LUPB_OPENFUNC(upb)(lua_State *L) { return luaopen_upb(L); } diff --git a/bindings/lua/upb.h b/bindings/lua/upb.h new file mode 100644 index 0000000..e6b4f2f --- /dev/null +++ b/bindings/lua/upb.h @@ -0,0 +1,45 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Shared definitions for upb Lua modules. + */ + +#ifndef UPB_LUA_UPB_H_ +#define UPB_LUA_UPB_H_ + +#include "upb/def.h" + +// Lua 5.1/5.2 compatibility code. +#if LUA_VERSION_NUM == 501 + +#define lua_rawlen lua_objlen +#define lupb_newlib(L, name, l) luaL_register(L, name, l) +#define lupb_setfuncs(L, l) luaL_register(L, NULL, l) +#define LUPB_OPENFUNC(mod) luaopen_ ## mod ## upb5_1 + +void *luaL_testudata(lua_State *L, int ud, const char *tname); + +#elif LUA_VERSION_NUM == 502 + +// Lua 5.2 modules are not expected to set a global variable, so "name" is +// unused. +#define lupb_newlib(L, name, l) luaL_newlib(L, l) +#define lupb_setfuncs(L, l) luaL_setfuncs(L, l, 0) +int luaL_typerror(lua_State *L, int narg, const char *tname); +#define LUPB_OPENFUNC(mod) luaopen_ ## mod ## upb5_2 + +#else +#error Only Lua 5.1 and 5.2 are supported +#endif + +const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg); +const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg); +const char *lupb_checkname(lua_State *L, int narg); +bool lupb_def_pushwrapper(lua_State *L, const upb_def *def, const void *owner); +void lupb_def_pushnewrapper(lua_State *L, const upb_def *def, + const void *owner); + +#endif // UPB_LUA_UPB_H_ diff --git a/dynasm/COPYRIGHT b/dynasm/COPYRIGHT index 032f2de..6f2a45b 100644 --- a/dynasm/COPYRIGHT +++ b/dynasm/COPYRIGHT @@ -1,5 +1,3 @@ -DynASM is taken from LuaJIT 2, which carries the following license statement: - =============================================================================== LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index fb0916d..59603d9 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -8,20 +8,20 @@ */ #include +#include #include -#include "upb/bytestream.hpp" -#include "upb/def.hpp" -#include "upb/handlers.hpp" -#include "upb/upb.hpp" -#include "upb/pb/decoder.hpp" -#include "upb/pb/glue.hpp" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/pb/glue.h" #include "upb_test.h" +#include "upb/upb.h" static void TestSymbolTable(const char *descriptor_file) { upb::SymbolTable *s = upb::SymbolTable::New(&s); upb::Status status; if (!upb::LoadDescriptorFileIntoSymtab(s, descriptor_file, &status)) { - std::cerr << "Couldn't load descriptor: " << status; + std::cerr << "Couldn't load descriptor: " << status.GetString(); exit(1); } const upb::MessageDef *md = s->LookupMessage("A", &md); @@ -41,7 +41,9 @@ static void TestByteStream() { free(str); } -int main(int argc, char *argv[]) { +extern "C" { + +int run_tests(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr, "Usage: test_cpp \n"); return 1; @@ -50,3 +52,5 @@ int main(int argc, char *argv[]) { TestByteStream(); return 0; } + +} diff --git a/tests/test_decoder.cc b/tests/test_decoder.cc index 13403bb..d42c0fe 100644 --- a/tests/test_decoder.cc +++ b/tests/test_decoder.cc @@ -7,6 +7,7 @@ * input, with buffer breaks in arbitrary places. * * Tests to add: + * - string/bytes * - unknown field handler called appropriately * - unknown fields can be inserted in random places * - fuzzing of valid input @@ -35,6 +36,9 @@ #include "upb/pb/varint.h" #include "upb/upb.h" #include "upb_test.h" +#include "third_party/upb/tests/test_decoder_schema.upb.h" + +uint32_t filter_hash = 0; // Copied from decoder.c, since this is not a public interface. typedef struct { @@ -186,66 +190,78 @@ void indent(void *depth) { indentbuf(&output, *(int*)depth); } -#define VALUE_HANDLER(member, fmt) \ - upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \ - indent(closure); \ - output.appendf("%" PRIu32 ":%" fmt "\n", \ - upb_value_getuint32(fval), upb_value_get ## member(val)); \ - return UPB_CONTINUE; \ +#define NUMERIC_VALUE_HANDLER(member, ctype, fmt) \ + bool value_ ## member(void *closure, void *fval, ctype val) { \ + indent(closure); \ + uint32_t *num = static_cast(fval); \ + output.appendf("%" PRIu32 ":%" fmt "\n", *num, val); \ + return true; \ } -VALUE_HANDLER(uint32, PRIu32) -VALUE_HANDLER(uint64, PRIu64) -VALUE_HANDLER(int32, PRId32) -VALUE_HANDLER(int64, PRId64) -VALUE_HANDLER(float, "g") -VALUE_HANDLER(double, "g") +NUMERIC_VALUE_HANDLER(uint32, uint32_t, PRIu32) +NUMERIC_VALUE_HANDLER(uint64, uint64_t, PRIu64) +NUMERIC_VALUE_HANDLER(int32, int32_t, PRId32) +NUMERIC_VALUE_HANDLER(int64, int64_t, PRId64) +NUMERIC_VALUE_HANDLER(float, float, "g") +NUMERIC_VALUE_HANDLER(double, double, "g") -upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) { +bool value_bool(void *closure, void *fval, bool val) { indent(closure); - output.appendf("%" PRIu32 ":%s\n", - upb_value_getuint32(fval), - upb_value_getbool(val) ? "true" : "false"); - return UPB_CONTINUE; + uint32_t *num = static_cast(fval); + output.appendf("%" PRIu32 ":%s\n", *num, val ? "true" : "false"); + return true; } -upb_flow_t value_string(void *closure, upb_value fval, upb_value val) { - // Note: won't work with strings that contain NULL. +void* startstr(void *closure, void *fval, size_t size_hint) { indent(closure); - char *str = upb_byteregion_strdup(upb_value_getbyteregion(val)); - output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), str); - free(str); - return UPB_CONTINUE; + uint32_t *num = static_cast(fval); + output.appendf("%" PRIu32 ":(%zu)\"", *num, size_hint); + return ((int*)closure) + 1; +} + +size_t value_string(void *closure, void *fval, const char *buf, size_t n) { + output.append(buf, n); + return n; +} + +bool endstr(void *closure, void *fval) { + UPB_UNUSED(fval); + output.append("\"\n"); + return true; } -upb_sflow_t startsubmsg(void *closure, upb_value fval) { +void* startsubmsg(void *closure, void *fval) { indent(closure); - output.appendf("%" PRIu32 ":{\n", upb_value_getuint32(fval)); - return UPB_CONTINUE_WITH(((int*)closure) + 1); + uint32_t *num = static_cast(fval); + output.appendf("%" PRIu32 ":{\n", *num); + return ((int*)closure) + 1; } -upb_flow_t endsubmsg(void *closure, upb_value fval) { +bool endsubmsg(void *closure, void *fval) { + UPB_UNUSED(fval); indent(closure); output.append("}\n"); - return UPB_CONTINUE; + return true; } -upb_sflow_t startseq(void *closure, upb_value fval) { +void* startseq(void *closure, void *fval) { indent(closure); - output.appendf("%" PRIu32 ":[\n", upb_value_getuint32(fval)); - return UPB_CONTINUE_WITH(((int*)closure) + 1); + uint32_t *num = static_cast(fval); + output.appendf("%" PRIu32 ":[\n", *num); + return ((int*)closure) + 1; } -upb_flow_t endseq(void *closure, upb_value fval) { +bool endseq(void *closure, void *fval) { + UPB_UNUSED(fval); indent(closure); output.append("]\n"); - return UPB_CONTINUE; + return true; } -upb_flow_t startmsg(void *closure) { +bool startmsg(void *closure) { indent(closure); output.append("<\n"); - return UPB_CONTINUE; + return true; } void endmsg(void *closure, upb_status *status) { @@ -254,14 +270,23 @@ void endmsg(void *closure, upb_status *status) { output.append(">\n"); } -void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated, - upb_value_handler *handler) { - upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated); +void free_uint32(void *val) { + uint32_t *u32 = static_cast(val); + delete u32; +} + +template +void doreg(upb_handlers *h, uint32_t num, + typename upb::Handlers::Value::Handler *handler) { + const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), num); ASSERT(f); - upb_fhandlers_setvalue(f, handler); - upb_fhandlers_setstartseq(f, &startseq); - upb_fhandlers_setendseq(f, &endseq); - upb_fhandlers_setfval(f, upb_value_uint32(num)); + ASSERT(h->SetValueHandler(f, handler, new uint32_t(num), free_uint32)); + if (f->IsSequence()) { + ASSERT(h->SetStartSequenceHandler( + f, &startseq, new uint32_t(num), free_uint32)); + ASSERT(h->SetEndSequenceHandler( + f, &endseq, new uint32_t(num), free_uint32)); + } } // The repeated field number to correspond to the given non-repeated field @@ -273,57 +298,81 @@ uint32_t rep_fn(uint32_t fn) { #define NOP_FIELD 40 #define UNKNOWN_FIELD 666 -void reg(upb_mhandlers *m, upb_fieldtype_t type, upb_value_handler *handler) { +template +void reg(upb_handlers *h, upb_fieldtype_t type, + typename upb::Handlers::Value::Handler *handler) { // We register both a repeated and a non-repeated field for every type. // For the non-repeated field we make the field number the same as the // type. For the repeated field we make it a function of the type. - doreg(m, type, type, false, handler); - doreg(m, rep_fn(type), type, true, handler); + doreg(h, type, handler); + doreg(h, rep_fn(type), handler); } -void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, - bool repeated) { - upb_fhandlers *f = - upb_mhandlers_newfhandlers_subm(m, num, type, repeated, m); +void reg_subm(upb_handlers *h, uint32_t num) { + const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), num); ASSERT(f); - upb_fhandlers_setstartseq(f, &startseq); - upb_fhandlers_setendseq(f, &endseq); - upb_fhandlers_setstartsubmsg(f, &startsubmsg); - upb_fhandlers_setendsubmsg(f, &endsubmsg); - upb_fhandlers_setfval(f, upb_value_uint32(num)); + if (f->IsSequence()) { + ASSERT(h->SetStartSequenceHandler( + f, &startseq, new uint32_t(num), free_uint32)); + ASSERT(h->SetEndSequenceHandler( + f, &endseq, new uint32_t(num), free_uint32)); + } + ASSERT(h->SetStartSubMessageHandler( + f, &startsubmsg, new uint32_t(num), free_uint32)); + ASSERT(h->SetEndSubMessageHandler( + f, &endsubmsg, new uint32_t(num), free_uint32)); + ASSERT(upb_handlers_setsubhandlers(h, f, h)); } -void reghandlers(upb_mhandlers *m) { - upb_mhandlers_setstartmsg(m, &startmsg); - upb_mhandlers_setendmsg(m, &endmsg); +void reg_str(upb_handlers *h, uint32_t num) { + const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), num); + ASSERT(f); + if (f->IsSequence()) { + ASSERT(h->SetStartSequenceHandler( + f, &startseq, new uint32_t(num), free_uint32)); + ASSERT(h->SetEndSequenceHandler( + f, &endseq, new uint32_t(num), free_uint32)); + } + ASSERT(h->SetStartStringHandler( + f, &startstr, new uint32_t(num), free_uint32)); + ASSERT(h->SetEndStringHandler( + f, &endstr, new uint32_t(num), free_uint32)); + ASSERT(h->SetStringHandler( + f, &value_string, new uint32_t(num), free_uint32)); +} + +void reghandlers(upb_handlers *h) { + upb_handlers_setstartmsg(h, &startmsg); + upb_handlers_setendmsg(h, &endmsg); // Register handlers for each type. - reg(m, UPB_TYPE(DOUBLE), &value_double); - reg(m, UPB_TYPE(FLOAT), &value_float); - reg(m, UPB_TYPE(INT64), &value_int64); - reg(m, UPB_TYPE(UINT64), &value_uint64); - reg(m, UPB_TYPE(INT32) , &value_int32); - reg(m, UPB_TYPE(FIXED64), &value_uint64); - reg(m, UPB_TYPE(FIXED32), &value_uint32); - reg(m, UPB_TYPE(BOOL), &value_bool); - reg(m, UPB_TYPE(STRING), &value_string); - reg(m, UPB_TYPE(BYTES), &value_string); - reg(m, UPB_TYPE(UINT32), &value_uint32); - reg(m, UPB_TYPE(ENUM), &value_int32); - reg(m, UPB_TYPE(SFIXED32), &value_int32); - reg(m, UPB_TYPE(SFIXED64), &value_int64); - reg(m, UPB_TYPE(SINT32), &value_int32); - reg(m, UPB_TYPE(SINT64), &value_int64); + reg (h, UPB_TYPE(DOUBLE), &value_double); + reg (h, UPB_TYPE(FLOAT), &value_float); + reg (h, UPB_TYPE(INT64), &value_int64); + reg(h, UPB_TYPE(UINT64), &value_uint64); + reg (h, UPB_TYPE(INT32) , &value_int32); + reg(h, UPB_TYPE(FIXED64), &value_uint64); + reg(h, UPB_TYPE(FIXED32), &value_uint32); + reg (h, UPB_TYPE(BOOL), &value_bool); + reg(h, UPB_TYPE(UINT32), &value_uint32); + reg (h, UPB_TYPE(ENUM), &value_int32); + reg (h, UPB_TYPE(SFIXED32), &value_int32); + reg (h, UPB_TYPE(SFIXED64), &value_int64); + reg (h, UPB_TYPE(SINT32), &value_int32); + reg (h, UPB_TYPE(SINT64), &value_int64); + + reg_str(h, UPB_TYPE(STRING)); + reg_str(h, UPB_TYPE(BYTES)); + reg_str(h, rep_fn(UPB_TYPE(STRING))); + reg_str(h, rep_fn(UPB_TYPE(BYTES))); // Register submessage/group handlers that are self-recursive // to this type, eg: message M { optional M m = 1; } - reg_subm(m, UPB_TYPE(MESSAGE), UPB_TYPE(MESSAGE), false); - reg_subm(m, UPB_TYPE(GROUP), UPB_TYPE(GROUP), false); - reg_subm(m, rep_fn(UPB_TYPE(MESSAGE)), UPB_TYPE(MESSAGE), true); - reg_subm(m, rep_fn(UPB_TYPE(GROUP)), UPB_TYPE(GROUP), true); + reg_subm(h, UPB_TYPE(MESSAGE)); + reg_subm(h, rep_fn(UPB_TYPE(MESSAGE))); - // Register a no-op string field so we can pad the proto wherever we want. - upb_mhandlers_newfhandlers(m, NOP_FIELD, UPB_TYPE(STRING), false); + // For NOP_FIELD we register no handlers, so we can pad a proto freely without + // changing the output. } @@ -413,22 +462,32 @@ upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) { /* Running of test cases ******************************************************/ upb_decoderplan *plan; + +uint32_t Hash(const buffer& proto, const buffer* expected_output) { + uint32_t hash = MurmurHash2(proto.buf(), proto.len(), 0); + if (expected_output) + hash = MurmurHash2(expected_output->buf(), expected_output->len(), hash); + bool hasjit = upb_decoderplan_hasjitcode(plan); + hash = MurmurHash2(&hasjit, 1, hash); + return hash; +} + #define LINE(x) x "\n" void run_decoder(const buffer& proto, const buffer* expected_output) { + testhash = Hash(proto, expected_output); + if (filter_hash && testhash != filter_hash) return; upb_seamsrc src; upb_seamsrc_init(&src, proto.buf(), proto.len()); upb_decoder d; upb_decoder_init(&d); - upb_decoder_resetplan(&d, plan, 0); + upb_decoder_resetplan(&d, plan); for (size_t i = 0; i < proto.len(); i++) { for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) { upb_seamsrc_resetseams(&src, i, j); upb_byteregion *input = upb_seamsrc_allbytes(&src); output.clear(); upb_decoder_resetinput(&d, input, &closures[0]); - upb_success_t success = UPB_SUSPENDED; - while (success == UPB_SUSPENDED) - success = upb_decoder_decode(&d); + upb_success_t success = upb_decoder_decode(&d); ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK)); if (expected_output) { ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d)); @@ -448,6 +507,7 @@ void run_decoder(const buffer& proto, const buffer* expected_output) { } upb_decoder_uninit(&d); upb_seamsrc_uninit(&src); + testhash = 0; } const static buffer thirty_byte_nop = buffer(cat( @@ -777,35 +837,47 @@ void run_tests() { test_valid(); } -int main() { +extern "C" { + +int run_tests(int argc, char *argv[]) { + if (argc > 1) + filter_hash = strtol(argv[1], NULL, 16); for (int i = 0; i < UPB_MAX_NESTING; i++) { closures[i] = i; } - // Construct decoder plan. - upb_handlers *h = upb_handlers_new(); - reghandlers(upb_handlers_newmhandlers(h)); // Create an empty handlers to make sure that the decoder can handle empty // messages. - upb_handlers_newmhandlers(h); + upb_handlers *h = upb_handlers_new(UPB_TEST_DECODER_EMPTYMESSAGE, &h); + bool ok = upb_handlers_freeze(&h, 1, NULL); + ASSERT(ok); + plan = upb_decoderplan_new(h, true); + upb_handlers_unref(h, &h); + upb_decoderplan_unref(plan); + + // Construct decoder plan. + h = upb_handlers_new(UPB_TEST_DECODER_DECODERTEST, &h); + reghandlers(h); + ok = upb_handlers_freeze(&h, 1, NULL); // Test without JIT. plan = upb_decoderplan_new(h, false); + ASSERT(!upb_decoderplan_hasjitcode(plan)); run_tests(); upb_decoderplan_unref(plan); +#ifdef UPB_USE_JIT_X64 // Test JIT. plan = upb_decoderplan_new(h, true); -#ifdef UPB_USE_JIT_X64 ASSERT(upb_decoderplan_hasjitcode(plan)); -#else - ASSERT(!upb_decoderplan_hasjitcode(plan)); -#endif run_tests(); upb_decoderplan_unref(plan); +#endif plan = NULL; printf("All tests passed, %d assertions.\n", num_assertions); - upb_handlers_unref(h); + upb_handlers_unref(h, &h); return 0; } + +} diff --git a/tests/test_decoder_schema.proto b/tests/test_decoder_schema.proto new file mode 100644 index 0000000..50bfca9 --- /dev/null +++ b/tests/test_decoder_schema.proto @@ -0,0 +1,64 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Schema used in test_decoder.cc. It contains two fields (one optional +// and one repeated) for each type. + +package upb.test_decoder; + +message M { + optional M m = 1; +} + +enum E { + FOO = 1; +} + +message EmptyMessage {} + +message DecoderTest { + optional double f_double = 1; + optional float f_float = 2; + optional int64 f_int64 = 3; + optional uint64 f_uint64 = 4; + optional int32 f_int32 = 5; + optional fixed64 f_fixed64 = 6; + optional fixed32 f_fixed32 = 7; + optional bool f_bool = 8; + optional string f_string = 9; + optional bytes f_bytes = 12; + optional uint32 f_uint32 = 13; + optional sfixed32 f_sfixed32 = 15; + optional sfixed64 f_sfixed64 = 16; + optional sint32 f_sint32 = 17; + optional sint64 f_sint64 = 18; + + optional DecoderTest f_message = 11; + optional E f_enum = 14; + + + repeated double r_double = 536869912; + repeated float r_float = 536869913; + repeated int64 r_int64 = 536869914; + repeated uint64 r_uint64 = 536869915; + repeated int32 r_int32 = 536869916; + repeated fixed64 r_fixed64 = 536869917; + repeated fixed32 r_fixed32 = 536869918; + repeated bool r_bool = 536869919; + repeated string r_string = 536869920; + repeated bytes r_bytes = 536869923; + repeated uint32 r_uint32 = 536869924; + repeated sfixed32 r_sfixed32 = 536869926; + repeated sfixed64 r_sfixed64 = 536869927; + repeated sint32 r_sint32 = 536869928; + repeated sint64 r_sint64 = 536869929; + + repeated DecoderTest r_message = 536869922; + repeated E r_enum = 536869925; + + // To allow arbitrary padding. + optional string nop_field = 40; +} diff --git a/tests/test_def.c b/tests/test_def.c index f60d556..7f089d7 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -18,7 +18,7 @@ const char *descriptor_file; static void test_empty_symtab() { upb_symtab *s = upb_symtab_new(&s); int count; - const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL); + const upb_def **defs = upb_symtab_getdefs(s, UPB_DEF_ANY, NULL, &count); ASSERT(count == 0); free(defs); upb_symtab_unref(s, &s); @@ -31,7 +31,7 @@ static upb_symtab *load_test_proto(void *owner) { if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) { fprintf(stderr, "Error loading descriptor file: %s\n", upb_status_getstr(&status)); - exit(1); + ASSERT(false); } upb_status_uninit(&status); return s; @@ -44,27 +44,30 @@ static void test_cycles() { // and then be incremented to one again. const upb_def *def = upb_symtab_lookup(s, "A", &def); ASSERT(def); - ASSERT(upb_def_isfinalized(def)); + ASSERT(upb_def_isfrozen(def)); upb_symtab_unref(s, &s); // Message A has only one subfield: "optional B b = 1". - const upb_msgdef *m = upb_downcast_msgdef_const(def); - upb_fielddef *f = upb_msgdef_itof(m, 1); + const upb_msgdef *m = upb_downcast_msgdef(def); + const upb_fielddef *f = upb_msgdef_itof(m, 1); ASSERT(f); - ASSERT(upb_hassubdef(f)); + ASSERT(upb_fielddef_hassubdef(f)); const upb_def *def2 = upb_fielddef_subdef(f); - ASSERT(upb_downcast_msgdef_const(def2)); + ASSERT(upb_downcast_msgdef(def2)); ASSERT(strcmp(upb_def_fullname(def2), "B") == 0); upb_def_ref(def2, &def2); upb_def_unref(def, &def); + + // We know "def" is still alive because it's reachable from def2. + ASSERT(strcmp(upb_def_fullname(def), "A") == 0); upb_def_unref(def2, &def2); } static void test_fielddef_unref() { upb_symtab *s = load_test_proto(&s); const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md); - upb_fielddef *f = upb_msgdef_itof(md, 1); + const upb_fielddef *f = upb_msgdef_itof(md, 1); upb_fielddef_ref(f, &f); // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef @@ -72,7 +75,7 @@ static void test_fielddef_unref() { upb_symtab_unref(s, &s); upb_msgdef_unref(md, &md); // Check that md is still alive. - ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0); + ASSERT(strcmp(upb_def_fullname(upb_upcast(md)), "A") == 0); // Check that unref of fielddef frees the whole remaining graph. upb_fielddef_unref(f, &f); @@ -82,14 +85,14 @@ static void test_fielddef_accessors() { upb_fielddef *f1 = upb_fielddef_new(&f1); upb_fielddef *f2 = upb_fielddef_new(&f2); - ASSERT(upb_fielddef_ismutable(f1)); + ASSERT(!upb_fielddef_isfrozen(f1)); upb_fielddef_setname(f1, "f1"); upb_fielddef_setnumber(f1, 1937); upb_fielddef_settype(f1, UPB_TYPE(FIXED64)); upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED)); ASSERT(upb_fielddef_number(f1) == 1937); - ASSERT(upb_fielddef_ismutable(f2)); + ASSERT(!upb_fielddef_isfrozen(f2)); upb_fielddef_setname(f2, "f2"); upb_fielddef_setnumber(f2, 1572); upb_fielddef_settype(f2, UPB_TYPE(BYTES)); @@ -98,6 +101,12 @@ static void test_fielddef_accessors() { upb_fielddef_unref(f1, &f1); upb_fielddef_unref(f2, &f2); + + // Test that we don't leak an unresolved subdef name. + f1 = upb_fielddef_new(&f1); + upb_fielddef_settype(f1, UPB_TYPE(MESSAGE)); + upb_fielddef_setsubdefname(f1, "YO"); + upb_fielddef_unref(f1, &f1); } static upb_fielddef *newfield( @@ -108,23 +117,23 @@ static upb_fielddef *newfield( upb_fielddef_setnumber(f, num); upb_fielddef_settype(f, type); upb_fielddef_setlabel(f, label); - upb_fielddef_setsubtypename(f, type_name); + upb_fielddef_setsubdefname(f, type_name); return f; } static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) { upb_msgdef *m = upb_msgdef_new(owner); - upb_def_setfullname(UPB_UPCAST(m), name); + upb_def_setfullname(upb_upcast(m), name); return m; } INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) { upb_enumdef *e = upb_enumdef_new(owner); - upb_def_setfullname(UPB_UPCAST(e), name); + upb_def_setfullname(upb_upcast(e), name); return e; } -void test_replacement() { +static void test_replacement() { upb_symtab *s = upb_symtab_new(&s); upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s); @@ -133,15 +142,15 @@ void test_replacement() { upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s); upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s); - upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)}; + upb_def *newdefs[] = {upb_upcast(m), upb_upcast(m2), upb_upcast(e)}; upb_status status = UPB_STATUS_INIT; ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status); // Try adding a new definition of MyEnum, MyMessage should get replaced with // a new version. upb_enumdef *e2 = upb_enumdef_new(&s); - upb_def_setfullname(UPB_UPCAST(e2), "MyEnum"); - upb_def *newdefs2[] = {UPB_UPCAST(e2)}; + upb_def_setfullname(upb_upcast(e2), "MyEnum"); + upb_def *newdefs2[] = {upb_upcast(e2)}; ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status); const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage", &m3); @@ -159,7 +168,95 @@ void test_replacement() { upb_symtab_unref(s, &s); } -int main(int argc, char *argv[]) { +static void test_freeze_free() { + // Test that freeze frees defs that were only being kept alive by virtue of + // sharing a group with other defs that are being frozen. + upb_msgdef *m1 = upb_msgdef_newnamed("M1", &m1); + upb_msgdef *m2 = upb_msgdef_newnamed("M2", &m2); + upb_msgdef *m3 = upb_msgdef_newnamed("M3", &m3); + upb_msgdef *m4 = upb_msgdef_newnamed("M4", &m4); + + // Freeze M4 and make M1 point to it. + upb_def_freeze((upb_def*const*)&m4, 1, NULL); + + upb_fielddef *f = upb_fielddef_new(&f); + upb_fielddef_settype(f, UPB_TYPE_MESSAGE); + ASSERT(upb_fielddef_setnumber(f, 1)); + ASSERT(upb_fielddef_setname(f, "foo")); + ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m4))); + + ASSERT(upb_msgdef_addfield(m1, f, &f)); + + // After this unref, M1 is the only thing keeping M4 alive. + upb_msgdef_unref(m4, &m4); + + // Force M1/M2/M3 into a single mutable refcounting group. + f = upb_fielddef_new(&f); + upb_fielddef_settype(f, UPB_TYPE_MESSAGE); + ASSERT(upb_fielddef_setnumber(f, 1)); + ASSERT(upb_fielddef_setname(f, "foo")); + + ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m1))); + ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m2))); + ASSERT(upb_fielddef_setsubdef(f, upb_upcast(m3))); + + // Make M3 cyclic with itself. + ASSERT(upb_msgdef_addfield(m3, f, &f)); + + // These will be kept alive since they are in the same refcounting group as + // M3, which still has a ref. Note: this behavior is not guaranteed by the + // API, but true in practice with its current implementation. + upb_msgdef_unref(m1, &m1); + upb_msgdef_unref(m2, &m2); + + // Test that they are still alive (NOT allowed by the API). + ASSERT(strcmp("M1", upb_def_fullname(upb_upcast(m1))) == 0); + ASSERT(strcmp("M2", upb_def_fullname(upb_upcast(m2))) == 0); + + // Freeze M3. If the test leaked no memory, then freeing m1 and m2 was + // successful. + ASSERT(upb_def_freeze((upb_def*const*)&m3, 1, NULL)); + + upb_msgdef_unref(m3, &m3); +} + +static void test_partial_freeze() { + // Test that freeze of only part of the graph correctly adjusts objects that + // point to the newly-frozen objects. + upb_msgdef *m1 = upb_msgdef_newnamed("M1", &m1); + upb_msgdef *m2 = upb_msgdef_newnamed("M2", &m2); + upb_msgdef *m3 = upb_msgdef_newnamed("M3", &m3); + + upb_fielddef *f1 = upb_fielddef_new(&f1); + upb_fielddef_settype(f1, UPB_TYPE_MESSAGE); + ASSERT(upb_fielddef_setnumber(f1, 1)); + ASSERT(upb_fielddef_setname(f1, "f1")); + ASSERT(upb_fielddef_setsubdef(f1, upb_upcast(m1))); + + upb_fielddef *f2 = upb_fielddef_new(&f2); + upb_fielddef_settype(f2, UPB_TYPE_MESSAGE); + ASSERT(upb_fielddef_setnumber(f2, 2)); + ASSERT(upb_fielddef_setname(f2, "f2")); + ASSERT(upb_fielddef_setsubdef(f2, upb_upcast(m2))); + + ASSERT(upb_msgdef_addfield(m3, f1, &f1)); + ASSERT(upb_msgdef_addfield(m3, f2, &f2)); + + // Freeze M1 and M2, which should cause the group to be split + // and m3 (left mutable) to take references on m1 and m2. + upb_def *defs[] = {upb_upcast(m1), upb_upcast(m2)}; + ASSERT(upb_def_freeze(defs, 2, NULL)); + + ASSERT(upb_msgdef_isfrozen(m1)); + ASSERT(upb_msgdef_isfrozen(m2)); + ASSERT(!upb_msgdef_isfrozen(m3)); + + upb_msgdef_unref(m1, &m1); + upb_msgdef_unref(m2, &m2); + upb_msgdef_unref(m3, &m3); +} + +int run_tests(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr, "Usage: test_def \n"); return 1; @@ -170,5 +267,7 @@ int main(int argc, char *argv[]) { test_fielddef_accessors(); test_fielddef_unref(); test_replacement(); + test_freeze_free(); + test_partial_freeze(); return 0; } diff --git a/tests/test_table.cc b/tests/test_table.cc index 2538e35..bb75fc4 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -34,7 +34,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) { /* Initialize structures. */ upb_strtable table; std::map m; - upb_strtable_init(&table); + upb_strtable_init(&table, UPB_CTYPE_INT32); std::set all; for(size_t i = 0; i < num_to_insert; i++) { const std::string& key = keys[i]; @@ -77,7 +77,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { uint32_t largest_key = 0; std::map m; __gnu_cxx::hash_map hm; - upb_inttable_init(&table); + upb_inttable_init(&table, UPB_CTYPE_UINT32); for(size_t i = 0; i < num_entries; i++) { int32_t key = keys[i]; largest_key = UPB_MAX((int32_t)largest_key, key); @@ -103,7 +103,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { upb_value val; bool ret = upb_inttable_remove(&table, keys[i], &val); ASSERT(ret == (m.erase(keys[i]) == 1)); - if (ret) ASSERT(upb_value_getuint32(val) == keys[i] * 2); + if (ret) ASSERT(upb_value_getuint32(val) == (uint32_t)keys[i] * 2); hm.erase(keys[i]); m.erase(keys[i]); } @@ -244,7 +244,9 @@ int32_t *get_contiguous_keys(int32_t num) { return buf; } -int main(int argc, char *argv[]) { +extern "C" { + +int run_tests(int argc, char *argv[]) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; } @@ -292,4 +294,7 @@ int main(int argc, char *argv[]) { } test_inttable(keys4, 64, "Table size: 64, keys: 1-32 and 10133-10164 ====\n"); delete[] keys4; + return 0; +} + } diff --git a/tests/test_varint.c b/tests/test_varint.c index 0fc93f0..bdbc573 100644 --- a/tests/test_varint.c +++ b/tests/test_varint.c @@ -86,16 +86,19 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) { printf("ok.\n"); \ } \ -TEST_VARINT_DECODER(branch32); -TEST_VARINT_DECODER(branch64); +TEST_VARINT_DECODER(check2_branch32); +TEST_VARINT_DECODER(check2_branch64); TEST_VARINT_DECODER(check2_wright); TEST_VARINT_DECODER(check2_massimino); -int main() { - test_branch32(); - test_branch64(); +int run_tests(int argc, char *argv[]) { + UPB_UNUSED(argc); + UPB_UNUSED(argv); + test_check2_branch32(); + test_check2_branch64(); test_check2_wright(); test_check2_massimino(); + return 0; } #if 0 diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 020dca5..5eca399 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -10,19 +10,19 @@ #define __STDC_LIMIT_MACROS // So we get UINT32_MAX #include #include +#include #include #include #include #include #include #include "benchmarks/google_messages.pb.h" -#include "upb/def.hpp" -#include "upb/handlers.hpp" -#include "upb/msg.hpp" -#include "upb/pb/decoder.hpp" +#include "bindings/cpp/upb/pb/decoder.hpp" +#include "upb/def.h" +#include "upb/google/bridge.h" +#include "upb/handlers.h" #include "upb/pb/glue.h" #include "upb/pb/varint.h" -#include "upb/proto2_bridge.hpp" #include "upb_test.h" void compare_metadata(const google::protobuf::Descriptor* d, @@ -36,28 +36,25 @@ void compare_metadata(const google::protobuf::Descriptor* d, ASSERT(proto2_f); ASSERT(upb_f->number() == proto2_f->number()); ASSERT(std::string(upb_f->name()) == proto2_f->name()); - ASSERT(upb_f->type() == static_cast(proto2_f->type())); + ASSERT(upb_f->type() == static_cast(proto2_f->type())); ASSERT(upb_f->IsSequence() == proto2_f->is_repeated()); } } void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2, - const upb::MessageDef *upb_md, + const upb::Handlers *handlers, const char *str, size_t len, bool allow_jit) { // Parse to both proto2 and upb. ASSERT(msg1->ParseFromArray(str, len)); - upb::Handlers* handlers = upb::Handlers::New(); - upb::RegisterWriteHandlers(handlers, upb_md); upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit); upb::StringSource src(str, len); upb::Decoder decoder; - decoder.ResetPlan(plan, 0); + decoder.ResetPlan(plan); decoder.ResetInput(src.AllBytes(), msg2); msg2->Clear(); ASSERT(decoder.Decode() == UPB_OK); plan->Unref(); - handlers->Unref(); // Would like to just compare the message objects themselves, but // unfortunately MessageDifferencer is not part of the open-source release of @@ -83,7 +80,9 @@ void test_zig_zag() { } -int main(int argc, char *argv[]) +extern "C" { + +int run_tests(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr, "Usage: test_vs_proto2 \n"); @@ -102,18 +101,18 @@ int main(int argc, char *argv[]) MESSAGE_CIDENT msg1; MESSAGE_CIDENT msg2; - const upb::MessageDef* m = upb::proto2_bridge::NewFinalMessageDef(msg1, &m); + const upb::Handlers* h = upb::google::NewWriteHandlers(msg1, &h); - compare_metadata(msg1.GetDescriptor(), m); + compare_metadata(msg1.GetDescriptor(), h->message_def()); // Run twice to test proper object reuse. - parse_and_compare(&msg1, &msg2, m, str, len, true); - parse_and_compare(&msg1, &msg2, m, str, len, false); - parse_and_compare(&msg1, &msg2, m, str, len, true); - parse_and_compare(&msg1, &msg2, m, str, len, false); + parse_and_compare(&msg1, &msg2, h, str, len, false); + parse_and_compare(&msg1, &msg2, h, str, len, true); + parse_and_compare(&msg1, &msg2, h, str, len, false); + parse_and_compare(&msg1, &msg2, h, str, len, true); printf("All tests passed, %d assertions.\n", num_assertions); - m->Unref(&m); + h->Unref(&h); free((void*)str); test_zig_zag(); @@ -121,3 +120,5 @@ int main(int argc, char *argv[]) google::protobuf::ShutdownProtobufLibrary(); return 0; } + +} diff --git a/tests/testmain.cc b/tests/testmain.cc new file mode 100644 index 0000000..ac0b313 --- /dev/null +++ b/tests/testmain.cc @@ -0,0 +1,18 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// Author: haberman@google.com (Josh Haberman) + +#include +#ifdef USE_GOOGLE +#include "base/init_google.h" +#endif + +extern "C" { +int run_tests(int argc, char *argv[]); +} + +int main(int argc, char *argv[]) { +#ifdef USE_GOOGLE + InitGoogle(NULL, &argc, &argv, true); +#endif + run_tests(argc, argv); +} diff --git a/tests/upb_test.h b/tests/upb_test.h index 652977b..60714ba 100644 --- a/tests/upb_test.h +++ b/tests/upb_test.h @@ -9,25 +9,35 @@ #include #include +#include #ifdef __cplusplus extern "C" { #endif int num_assertions = 0; +uint32_t testhash = 0; + +#define PRINT_FAILURE(expr) \ + fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ + fprintf(stderr, "expr: %s\n", #expr); \ + if (testhash) { \ + fprintf(stderr, "assertion failed running test %x. " \ + "Run with the arg %x to run only this test.\n", \ + testhash, testhash); \ + } + #define ASSERT(expr) do { \ ++num_assertions; \ if (!(expr)) { \ - fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ - fprintf(stderr, "expr: %s\n", #expr); \ + PRINT_FAILURE(expr) \ abort(); \ } \ } while (0) #define ASSERT_NOCOUNT(expr) do { \ if (!(expr)) { \ - fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ - fprintf(stderr, "expr: %s\n", #expr); \ + PRINT_FAILURE(expr) \ abort(); \ } \ } while (0) @@ -35,8 +45,7 @@ int num_assertions = 0; #define ASSERT_STATUS(expr, status) do { \ ++num_assertions; \ if (!(expr)) { \ - fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ - fprintf(stderr, "expr: %s\n", #expr); \ + PRINT_FAILURE(expr) \ fprintf(stderr, "failed status: %s\n", upb_status_getstr(status)); \ abort(); \ } \ diff --git a/tools/dump_cinit.lua b/tools/dump_cinit.lua new file mode 100644 index 0000000..1447d58 --- /dev/null +++ b/tools/dump_cinit.lua @@ -0,0 +1,414 @@ +--[[ + + upb - a minimalist implementation of protocol buffers. + + Copyright (c) 2012 Google Inc. See LICENSE for details. + Author: Josh Haberman + + Routines for dumping internal data structures into C initializers + that can be compiled into a .o file. + +--]] + +local upbtable = require "upbtable" +local upb = require "upb" +local export = {} + +-- A tiny little abstraction that decouples the dump_* functions from +-- what they're writing to (appending to a string, writing to file I/O, etc). +-- This could possibly matter since naive string building is O(n^2) in the +-- number of appends. +function export.str_appender() + local str = "" + local function append(fmt, ...) + str = str .. string.format(fmt, ...) + end + local function get() + return str + end + return append, get +end + +function export.file_appender(file) + local f = file + local function append(fmt, ...) + f:write(string.format(fmt, ...)) + end + return append +end + +-- const(f, label) -> UPB_LABEL_REPEATED, where f:label() == upb.LABEL_REPEATED +function const(obj, name) + local val = obj[name](obj) + for k, v in pairs(upb) do + if v == val and string.find(k, "^" .. string.upper(name)) then + return "UPB_" .. k + end + end + assert(false, "Couldn't find constant") +end + +--[[ + + LinkTable: an object that tracks all linkable objects and their offsets to + facilitate linking. + +--]] + +local LinkTable = {} +function LinkTable:new(basename, types) + local linktab = { + basename = basename, + types = types, + table = {}, -- ptr -> {type, 0-based offset} + obj_arrays = {} -- Establishes the ordering for each object type + } + for type, _ in pairs(types) do + linktab.obj_arrays[type] = {} + end + setmetatable(linktab, {__index = LinkTable}) -- Inheritance + return linktab +end + +-- Adds a new object to the sequence of objects of this type. +function LinkTable:add(objtype, ptr, obj) + obj = obj or ptr + assert(self.table[obj] == nil) + assert(self.types[objtype]) + local arr = self.obj_arrays[objtype] + self.table[ptr] = {objtype, #arr} + arr[#arr + 1] = obj +end + +-- Returns a C symbol name for the given objtype and offset. +function LinkTable:csym(objtype, offset) + local typestr = assert(self.types[objtype]) + return string.format("%s_%s[%d]", self.basename, typestr, offset) +end + +-- Returns the address of the given C object. +function LinkTable:addr(obj) + if obj == upbtable.NULL then + return "NULL" + else + local tabent = assert(self.table[obj], "unknown object") + return "&" .. self:csym(tabent[1], tabent[2]) + end +end + +-- Returns an array declarator indicating how many objects have been added. +function LinkTable:cdecl(objtype) + return self:csym(objtype, #self.obj_arrays[objtype]) +end + +function LinkTable:objs(objtype) + -- Return iterator function, allowing use as: + -- for obj in linktable:objs(type) do + -- -- ... + -- done + local array = self.obj_arrays[objtype] + local i = 0 + return function() + i = i + 1 + if array[i] then return array[i] end + end +end + +--[[ + + Dumper: an object that can dump C initializers for several constructs. + Uses a LinkTable to resolve references when necessary. + +--]] + +local Dumper = {} +function Dumper:new(linktab) + local obj = {linktab = linktab} + setmetatable(obj, {__index = Dumper}) -- Inheritance + return obj +end + +-- Dumps a upb_value, eg: +-- UPB_VALUE_INIT_INT32(5) +function Dumper:value(val, upbtype) + if type(val) == "nil" then + return "UPB_VALUE_INIT_NONE" + elseif type(val) == "number" then + -- Use upbtype to disambiguate what kind of number it is. + if upbtype == upbtable.CTYPE_INT32 then + return string.format("UPB_VALUE_INIT_INT32(%d)", val) + else + -- TODO(haberman): add support for these so we can properly support + -- default values. + error("Unsupported number type " .. upbtype) + end + elseif type(val) == "string" then + return string.format('UPB_VALUE_INIT_CONSTPTR("%s")', val) + else + -- We take this as an object reference that has an entry in the link table. + return string.format("UPB_VALUE_INIT_CONSTPTR(%s)", self.linktab:addr(val)) + end +end + +-- Dumps a table key. +function Dumper:tabkey(key) + if type(key) == "nil" then + return "UPB_TABKEY_NONE" + elseif type(key) == "string" then + return string.format('UPB_TABKEY_STR("%s")', key) + else + return string.format("UPB_TABKEY_NUM(%d)", key) + end +end + +-- Dumps a table entry. +function Dumper:tabent(ent) + local key = self:tabkey(ent.key) + local val = self:value(ent.value, ent.valtype) + local next = self.linktab:addr(ent.next) + return string.format(' {%s, %s, %s},\n', key, val, next) +end + +-- Dumps an inttable array entry. This is almost the same as value() above, +-- except that nil values have a special value to indicate "empty". +function Dumper:arrayval(val) + if val.val then + return string.format(" %s,\n", self:value(val.val, val.valtype)) + else + return " UPB_ARRAY_EMPTYENT,\n" + end +end + +-- Dumps an initializer for the given strtable/inttable (respectively). Its +-- entries must have previously been added to the linktable. +function Dumper:strtable(t) + -- UPB_STRTABLE_INIT(count, mask, type, size_lg2, entries) + return string.format( + "UPB_STRTABLE_INIT(%d, %d, %d, %d, %s)", + t.count, t.mask, t.type, t.size_lg2, self.linktab:addr(t.entries[1].ptr)) +end + +function Dumper:inttable(t) + local lt = assert(self.linktab) + -- UPB_INTTABLE_INIT(count, mask, type, size_lg2, ent, a, asize, acount) + local entries = "NULL" + if #t.entries > 0 then + entries = lt:addr(t.entries[1].ptr) + end + return string.format( + "UPB_INTTABLE_INIT(%d, %d, %d, %d, %s, %s, %d, %d)", + t.count, t.mask, t.type, t.size_lg2, entries, + lt:addr(t.array[1].ptr), t.array_size, t.array_count) +end + +-- A visitor for visiting all tables of a def. Used first to count entries +-- and later to dump them. +local function gettables(def) + if def:def_type() == upb.DEF_MSG then + return {int = upbtable.msgdef_itof(def), str = upbtable.msgdef_ntof(def)} + elseif def:def_type() == upb.DEF_ENUM then + return {int = upbtable.enumdef_iton(def), str = upbtable.enumdef_ntoi(def)} + end +end + +local function emit_file_warning(append) + append('// This file was generated by upbc (the upb compiler).\n') + append('// Do not edit -- your changes will be discarded when the file is\n') + append('// regenerated.\n\n') +end + +--[[ + + Top-level, exported dumper functions + +--]] + +local function dump_defs_c(symtab, basename, append) + -- Add fielddefs for any msgdefs passed in. + local fielddefs = {} + for _, def in ipairs(symtab:getdefs(upb.DEF_MSG)) do + for field in def:fields() do + fielddefs[#fielddefs + 1] = field + end + end + + -- Get a list of all defs and add fielddefs to it. + local defs = symtab:getdefs(upb.DEF_ANY) + for _, fielddef in ipairs(fielddefs) do + defs[#defs + 1] = fielddef + end + + -- Sort all defs by (type, name). + -- This gives us a linear ordering that we can use to create offsets into + -- shared arrays like REFTABLES, hash table entries, and arrays. + table.sort(defs, function(a, b) + if a:def_type() ~= b:def_type() then + return a:def_type() < b:def_type() + else + return a:full_name() < b:full_name() end + end + ) + + -- Perform pre-pass to build the link table. + local linktab = LinkTable:new(basename, { + [upb.DEF_MSG] = "msgs", + [upb.DEF_FIELD] = "fields", + [upb.DEF_ENUM] = "enums", + intentries = "intentries", + strentries = "strentries", + arrays = "arrays", + }) + for _, def in ipairs(defs) do + assert(def:is_frozen(), "can only dump frozen defs.") + linktab:add(def:def_type(), def) + local tables = gettables(def) + if tables then + for _, e in ipairs(tables.str.entries) do + linktab:add("strentries", e.ptr, e) + end + for _, e in ipairs(tables.int.entries) do + linktab:add("intentries", e.ptr, e) + end + for _, e in ipairs(tables.int.array) do + linktab:add("arrays", e.ptr, e) + end + end + end + + -- Emit forward declarations. + emit_file_warning(append) + append('#include "upb/def.h"\n\n') + append("const upb_msgdef %s;\n", linktab:cdecl(upb.DEF_MSG)) + append("const upb_fielddef %s;\n", linktab:cdecl(upb.DEF_FIELD)) + append("const upb_enumdef %s;\n", linktab:cdecl(upb.DEF_ENUM)) + append("const upb_tabent %s;\n", linktab:cdecl("strentries")) + append("const upb_tabent %s;\n", linktab:cdecl("intentries")) + append("const upb_value %s;\n", linktab:cdecl("arrays")) + append("\n") + + -- Emit defs. + local dumper = Dumper:new(linktab) + + append("const upb_msgdef %s = {\n", linktab:cdecl(upb.DEF_MSG)) + for m in linktab:objs(upb.DEF_MSG) do + local tables = gettables(m) + -- UPB_MSGDEF_INIT(name, itof, ntof) + append(' UPB_MSGDEF_INIT("%s", %s, %s, %s),\n', + m:full_name(), + dumper:inttable(tables.int), + dumper:strtable(tables.str), + m:_selector_count()) + end + append("};\n\n") + + append("const upb_fielddef %s = {\n", linktab:cdecl(upb.DEF_FIELD)) + for f in linktab:objs(upb.DEF_FIELD) do + local subdef = "NULL" + if f:has_subdef() then + subdef = string.format("upb_upcast(%s)", linktab:addr(f:subdef())) + end + -- UPB_FIELDDEF_INIT(label, type, name, num, msgdef, subdef, + -- selector_base, default_value) + append(' UPB_FIELDDEF_INIT(%s, %s, "%s", %d, %s, %s, %d, %s),\n', + const(f, "label"), const(f, "type"), f:name(), + f:number(), linktab:addr(f:msgdef()), subdef, + f:_selector_base(), + dumper:value(nil) -- TODO + ) + end + append("};\n\n") + + append("const upb_enumdef %s = {\n", linktab:cdecl(upb.DEF_ENUM)) + for e in linktab:objs(upb.DEF_ENUM) do + local tables = gettables(e) + -- UPB_ENUMDEF_INIT(name, ntoi, iton, defaultval) + append(' UPB_ENUMDEF_INIT("%s", %s, %s, %d),\n', + e:full_name(), + dumper:strtable(tables.str), + dumper:inttable(tables.int), + --e:default()) + 0) + end + append("};\n\n") + + append("const upb_tabent %s = {\n", linktab:cdecl("strentries")) + for ent in linktab:objs("strentries") do + append(dumper:tabent(ent)) + end + append("};\n\n"); + + append("const upb_tabent %s = {\n", linktab:cdecl("intentries")) + for ent in linktab:objs("intentries") do + append(dumper:tabent(ent)) + end + append("};\n\n"); + + append("const upb_value %s = {\n", linktab:cdecl("arrays")) + for ent in linktab:objs("arrays") do + append(dumper:arrayval(ent)) + end + append("};\n\n"); + + return linktab +end + +local function join(...) + return table.concat({...}, ".") +end + +local function to_cident(...) + return string.gsub(join(...), "%.", "_") +end + +local function to_preproc(...) + return string.upper(to_cident(...)) +end + +local function dump_defs_h(symtab, basename, append, linktab) + local ucase_basename = string.upper(basename) + emit_file_warning(append) + append('#ifndef %s_UPB_H_\n', ucase_basename) + append('#define %s_UPB_H_\n\n', ucase_basename) + append('#include "upb/def.h"\n\n') + append('#ifdef __cplusplus\n') + append('extern "C" {\n') + append('#endif\n\n') + + -- Dump C enums for proto enums. + append("// Enums\n\n") + for _, def in ipairs(symtab:getdefs(upb.DEF_ENUM)) do + local cident = to_cident(def:full_name()) + append('typedef enum {\n') + for k, v in def:values() do + append(' %s = %d,\n', to_preproc(cident, k), v) + end + append('} %s;\n\n', cident) + end + + -- Dump macros for referring to specific defs. + append("// Do not refer to these forward declarations; use the constants\n") + append("// below.\n") + append("extern const upb_msgdef %s;\n", linktab:cdecl(upb.DEF_MSG)) + append("extern const upb_fielddef %s;\n", linktab:cdecl(upb.DEF_FIELD)) + append("extern const upb_enumdef %s;\n\n", linktab:cdecl(upb.DEF_ENUM)) + append("// Constants for references to defs.\n") + append("// We hide these behind macros to decouple users from the\n") + append("// details of how we have statically defined them (ie. whether\n") + append("// each def has its own symbol or lives in an array of defs).\n") + for def in linktab:objs(upb.DEF_MSG) do + append("#define %s %s\n", to_preproc(def:full_name()), linktab:addr(def)) + end + append("\n") + + append('#ifdef __cplusplus\n') + append('}; // extern "C"\n') + append('#endif\n\n') + append('#endif // %s_UPB_H_\n', ucase_basename) +end + +function export.dump_defs(symtab, basename, append_h, append_c) + local linktab = dump_defs_c(symtab, basename, append_c) + dump_defs_h(symtab, basename, append_h, linktab) +end + +return export diff --git a/tools/test_cinit.lua b/tools/test_cinit.lua new file mode 100644 index 0000000..bb7977f --- /dev/null +++ b/tools/test_cinit.lua @@ -0,0 +1,78 @@ +--[[ + + upb - a minimalist implementation of protocol buffers. + + Copyright (c) 2012 Google Inc. See LICENSE for details. + Author: Josh Haberman + + Tests for dump_cinit.lua. Runs first in a mode that generates + some C code for an extension. The C code is compiled and then + loaded by a second invocation of the test which checks that the + generated defs are as expected. + +--]] + +local dump_cinit = require "dump_cinit" +local upb = require "upb" + +-- Once APIs for loading descriptors are fleshed out, we should replace this +-- with a descriptor for a meaty protobuf like descriptor.proto. +local symtab = upb.SymbolTable{ + upb.EnumDef{full_name = "MyEnum", + values = { + {"FOO", 1}, + {"BAR", 77} + } + }, + upb.MessageDef{full_name = "MyMessage", + fields = { + upb.FieldDef{label = upb.LABEL_REQUIRED, name = "field1", number = 1, + type = upb.TYPE_INT32}, + upb.FieldDef{label = upb.LABEL_REPEATED, name = "field2", number = 2, + type = upb.TYPE_ENUM, subdef_name = ".MyEnum"}, + upb.FieldDef{name = "field3", number = 3, type = upb.TYPE_MESSAGE, + subdef_name = ".MyMessage"} + } + } +} + +if arg[1] == "generate" then + local f = assert(io.open(arg[2], "w")) + local f_h = assert(io.open(arg[2] .. ".h", "w")) + local appendc = dump_cinit.file_appender(f) + local appendh = dump_cinit.file_appender(f_h) + f:write('#include "lua.h"\n') + f:write('#define ELEMENTS(array) (sizeof(array)/sizeof(*array))\n') + f:write('#include "bindings/lua/upb.h"\n') + dump_cinit.dump_defs(symtab, "test", appendh, appendc) + f:write([[int luaopen_staticdefs(lua_State *L) { + lua_newtable(L); + for (int i = 0; i < ELEMENTS(test_msgs); i++) { + lupb_def_pushnewrapper(L, upb_upcast(&test_msgs[i]), NULL); + lua_rawseti(L, -2, i + 1); + } + for (int i = 0; i < ELEMENTS(test_enums); i++) { + lupb_def_pushnewrapper(L, upb_upcast(&test_enums[i]), NULL); + lua_rawseti(L, -2, ELEMENTS(test_msgs) + i + 1); + } + return 1; + }]]) + f_h:close() + f:close() +elseif arg[1] == "test" then + local staticdefs = require "staticdefs" + + local msg = assert(staticdefs[1]) + local enum = assert(staticdefs[2]) + local f2 = assert(msg:field("field2")) + assert(msg:def_type() == upb.DEF_MSG) + assert(msg:full_name() == "MyMessage") + assert(enum:def_type() == upb.DEF_ENUM) + assert(enum:full_name() == "MyEnum") + assert(enum:value("FOO") == 1) + assert(f2:name() == "field2") + assert(f2:msgdef() == msg) + assert(f2:subdef() == enum) +else + error("Unknown operation " .. arg[1]) +end diff --git a/tools/upbc.c b/tools/upbc.c deleted file mode 100644 index 4b25f3e..0000000 --- a/tools/upbc.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * upbc is the upb compiler, which at the moment simply takes a - * protocol descriptor and outputs a header file containing the - * names and types of the fields. - */ - -#include -#include -#include -#include -#include "upb/bytestream.h" -#include "upb/def.h" -#include "upb/msg.h" -#include "upb/pb/glue.h" - -/* These are in-place string transformations that do not change the length of - * the string (and thus never need to re-allocate). */ - -// Convert to C identifier: foo.bar.Baz -> foo_bar_Baz. -static void to_cident(char *str) { - for (; *str; ++str) { - if(*str == '.' || *str == '/') *str = '_'; - } -} - -// Convert to C proprocessor identifier: foo.bar.Baz -> FOO_BAR_BAZ. -static void to_preproc(char *str) { - to_cident(str); - for (; *str; ++str) *str = toupper(*str); -} - -/* The _const.h file defines the constants (enums) defined in the .proto - * file. */ -static void write_const_h(const upb_def *defs[], int num_entries, - char *outfile_name, FILE *stream) { - /* Header file prologue. */ - char *include_guard_name = strdup(outfile_name); - to_preproc(include_guard_name); - - fputs("/* This file was generated by upbc (the upb compiler). " - "Do not edit. */\n\n", stream), - fprintf(stream, "#ifndef %s\n", include_guard_name); - fprintf(stream, "#define %s\n\n", include_guard_name); - fputs("#ifdef __cplusplus\n", stream); - fputs("extern \"C\" {\n", stream); - fputs("#endif\n\n", stream); - - /* Enums. */ - fprintf(stream, "/* Enums. */\n\n"); - for(int i = 0; i < num_entries; i++) { /* Foreach enum */ - if(defs[i]->type != UPB_DEF_ENUM) continue; - const upb_enumdef *enumdef = upb_downcast_enumdef_const(defs[i]); - char *enum_name = strdup(upb_def_fullname(UPB_UPCAST(enumdef))); - char *enum_val_prefix = strdup(enum_name); - to_cident(enum_name); - to_preproc(enum_val_prefix); - - fprintf(stream, "typedef enum %s {\n", enum_name); - bool first = true; - /* Foreach enum value. */ - upb_enum_iter iter; - for (upb_enum_begin(&iter, enumdef); - !upb_enum_done(&iter); - upb_enum_next(&iter)) { - char *value_name = strdup(upb_enum_iter_name(&iter)); - uint32_t value = upb_enum_iter_number(&iter); - to_preproc(value_name); - /* " GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13," */ - if (!first) fputs(",\n", stream); - first = false; - fprintf(stream, " %s_%s = %" PRIu32, enum_val_prefix, value_name, value); - free(value_name); - } - fprintf(stream, "\n} %s;\n\n", enum_name); - free(enum_name); - free(enum_val_prefix); - } - - /* Constants for field names and numbers. */ - fprintf(stream, "/* Constants for field names and numbers. */\n\n"); - for(int i = 0; i < num_entries; i++) { /* Foreach enum */ - const upb_msgdef *m = upb_dyncast_msgdef_const(defs[i]); - if(!m) continue; - char *msg_name = strdup(upb_def_fullname(UPB_UPCAST(m))); - char *msg_val_prefix = strdup(msg_name); - to_preproc(msg_val_prefix); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - char *preproc_field_name = strdup(upb_fielddef_name(f)); - to_preproc(preproc_field_name); - fprintf(stream, "#define %s_%s__FIELDNUM %d\n", - msg_val_prefix, preproc_field_name, upb_fielddef_number(f)); - fprintf(stream, "#define %s_%s__FIELDNAME \"%s\"\n", - msg_val_prefix, preproc_field_name, upb_fielddef_name(f)); - fprintf(stream, "#define %s_%s__FIELDTYPE %d\n\n", - msg_val_prefix, preproc_field_name, upb_fielddef_type(f)); - free(preproc_field_name); - } - free(msg_val_prefix); - free(msg_name); - } - - /* Epilogue. */ - fputs("#ifdef __cplusplus\n", stream); - fputs("} /* extern \"C\" */\n", stream); - fputs("#endif\n\n", stream); - fprintf(stream, "#endif /* %s */\n", include_guard_name); - free(include_guard_name); -} - -const char usage[] = - "upbc -- upb compiler.\n" - "upb v0.1 http://blog.reverberate.org/upb/\n" - "\n" - "Usage: upbc [options] descriptor-file\n" - "\n" - " -o OUTFILE-BASE Write to OUTFILE-BASE.h and OUTFILE-BASE.c instead\n" - " of using the input file as a basename.\n" -; - -void usage_err(const char *err) { - fprintf(stderr, "upbc: %s\n\n", err); - fputs(usage, stderr); - exit(1); -} - -void error(const char *err, ...) { - va_list args; - va_start(args, err); - fprintf(stderr, "upbc: "); - vfprintf(stderr, err, args); - va_end(args); - exit(1); -} - -int main(int argc, char *argv[]) { - /* Parse arguments. */ - char *outfile_base = NULL, *input_file = NULL; - for(int i = 1; i < argc; i++) { - if(strcmp(argv[i], "-o") == 0) { - if(++i == argc) - usage_err("-o must be followed by a FILE-BASE."); - else if(outfile_base) - usage_err("-o was specified multiple times."); - outfile_base = argv[i]; - } else { - if(input_file) - usage_err("You can only specify one input file."); - input_file = argv[i]; - } - } - if(!input_file) usage_err("You must specify an input file."); - if(!outfile_base) outfile_base = input_file; - - // Read and parse input file. - size_t len; - char *descriptor = upb_readfile(input_file, &len); - if(!descriptor) - error("Couldn't read input file."); - - // TODO: make upb_parsedesc use a separate symtab, so we can use it here when - // importing descriptor.proto. - upb_symtab *s = upb_symtab_new(); - upb_status status = UPB_STATUS_INIT; - upb_load_descriptor_into_symtab(s, descriptor, len, &status); - if(!upb_ok(&status)) { - error("Failed to parse input file descriptor: %s\n", - upb_status_getstr(&status)); - } - upb_status_uninit(&status); - - /* Emit output files. */ - char h_const_filename[256]; - const int maxsize = sizeof(h_const_filename); - if(snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize) - error("File base too long.\n"); - - FILE *h_const_file = fopen(h_const_filename, "w"); - if(!h_const_file) error("Failed to open _const.h output file\n"); - - int symcount; - const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY, &defs); - write_const_h(defs, symcount, h_const_filename, h_const_file); - for (int i = 0; i < symcount; i++) upb_def_unref(defs[i], &defs); - free(defs); - free(descriptor); - upb_symtab_unref(s); - fclose(h_const_file); - - return 0; -} diff --git a/tools/upbc.lua b/tools/upbc.lua new file mode 100644 index 0000000..f68d25f --- /dev/null +++ b/tools/upbc.lua @@ -0,0 +1,50 @@ +--[[ + + upb - a minimalist implementation of protocol buffers. + + Copyright (c) 2012 Google Inc. See LICENSE for details. + Author: Josh Haberman + + The upb compiler. Unlike the proto2 compiler, this does + not output any parsing code or generated classes or anything + specific to the protobuf binary format at all. At the moment + it only dumps C initializers for upb_defs, so that a .proto + file can be represented in a .o file. + +--]] + +local dump_cinit = require "dump_cinit" +local upb = require "upb" + +local src = arg[1] +local outbase = arg[2] +local basename = arg[3] +local hfilename = outbase .. ".upb.h" +local cfilename = outbase .. ".upb.c" + +if os.getenv("UPBC_VERBOSE") then + print("upbc:") + print(string.format(" source file=%s", src)) + print(string.format(" output file base=%s", outbase)) + print(string.format(" hfilename=%s", hfilename)) + print(string.format(" cfilename=%s", cfilename)) +end + +-- Open input/output files. +local f = assert(io.open(src, "r"), "couldn't open input file " .. src) +local descriptor = f:read("*all") +local symtab = upb.SymbolTable() +symtab:load_descriptor(descriptor) + +os.execute(string.format("mkdir -p `dirname %s`", outbase)) +local hfile = assert(io.open(hfilename, "w"), "couldn't open " .. hfilename) +local cfile = assert(io.open(cfilename, "w"), "couldn't open " .. cfilename) + +local happend = dump_cinit.file_appender(hfile) +local cappend = dump_cinit.file_appender(cfile) + +-- Dump defs +dump_cinit.dump_defs(symtab, basename, happend, cappend) + +hfile:close() +cfile:close() diff --git a/upb/bytestream.c b/upb/bytestream.c index a242df4..8473f33 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -7,11 +7,13 @@ #include "upb/bytestream.h" -#include #include #include -char *upb_byteregion_strdup(const struct _upb_byteregion *r) { + +/* upb_byteregion *************************************************************/ + +char *upb_byteregion_strdup(const upb_byteregion *r) { char *ret = malloc(upb_byteregion_len(r) + 1); upb_byteregion_copyall(r, ret); ret[upb_byteregion_len(r)] = '\0'; diff --git a/upb/bytestream.h b/upb/bytestream.h index bdfcd73..41f767a 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -73,16 +73,18 @@ #ifndef UPB_BYTESTREAM_H #define UPB_BYTESTREAM_H -#include -#include -#include -#include -#include -#include #include "upb.h" #ifdef __cplusplus +namespace upb { +class ByteRegion; +class StringSource; +} // namespace upb +typedef upb::StringSource upb_stringsrc; extern "C" { +#else +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; #endif typedef enum { @@ -185,22 +187,91 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs, #define UPB_NONDELIMITED (0xffffffffffffffffULL) -typedef struct _upb_byteregion { +#ifdef __cplusplus +} // extern "C" + +class upb::ByteRegion { + public: + static const uint64_t kNondelimited = UPB_NONDELIMITED; + typedef upb_bytesuccess_t ByteSuccess; + + // Accessors for the regions bounds -- the meaning of these is described in + // the diagram above. + uint64_t start_ofs() const; + uint64_t discard_ofs() const; + uint64_t fetch_ofs() const; + uint64_t end_ofs() const; + + // Returns how many bytes are fetched and available for reading starting from + // offset "offset". + uint64_t BytesAvailable(uint64_t offset) const; + + // Returns the total number of bytes remaining after offset "offset", or + // kNondelimited if the byteregion is non-delimited. + uint64_t BytesRemaining(uint64_t offset) const; + + uint64_t Length() const; + + // Sets the value of this byteregion to be a subset of the given byteregion's + // data. The caller is responsible for releasing this region before the src + // region is released (unless the region is first pinned, if pinning support + // is added. see below). + void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len); + void Release(); + + // Attempts to fetch more data, extending the fetched range of this + // byteregion. Returns true if the fetched region was extended by at least + // one byte, false on EOF or error (see *s for details). + ByteSuccess Fetch(); + + // Fetches all remaining data, returning false if the operation failed (see + // *s for details). May only be used on delimited byteregions. + ByteSuccess FetchAll(); + + // Discards bytes from the byteregion up until ofs (which must be greater or + // equal to discard_ofs()). It is valid to discard bytes that have not been + // fetched (such bytes will never be fetched) but it is an error to discard + // past the end of a delimited byteregion. + void Discard(uint64_t ofs); + + // Copies "len" bytes of data into "dst", starting at ofs. The specified + // region must be available. + void Copy(uint64_t ofs, size_t len, char *dst) const; + + // Copies all bytes from the byteregion into dst. Requires that the entire + // byteregion is fetched and that none has been discarded. + void CopyAll(char *dst) const; + + // Returns a pointer to the internal buffer for the byteregion starting at + // offset "ofs." Stores the number of bytes available in this buffer in *len. + // The returned buffer is invalidated when the byteregion is reset or + // released, or when the bytes are discarded. If the byteregion is not + // currently pinned, the pointer is only valid for the lifetime of the parent + // byteregion. + const char *GetPtr(uint64_t ofs, size_t *len) const; + + // Copies the contents of the byteregion into a newly-allocated, + // NULL-terminated string. Requires that the byteregion is fully fetched. + char *StrDup() const; + + template void AssignToString(T* str); + +#else +struct upb_byteregion { +#endif uint64_t start; uint64_t discard; uint64_t fetch; uint64_t end; // UPB_NONDELIMITED if nondelimited. upb_bytesrc *bytesrc; bool toplevel; // If true, discards hit the underlying bytesrc. -} upb_byteregion; +}; -// Initializes a byteregion. Its initial value will be empty. No methods may -// be called on an empty byteregion except upb_byteregion_reset(). -void upb_byteregion_init(upb_byteregion *r); -void upb_byteregion_uninit(upb_byteregion *r); +#ifdef __cplusplus +extern "C" { +#endif -// Accessors for the regions bounds -- the meaning of these is described in the -// diagram above. +// Native C API. INLINE uint64_t upb_byteregion_startofs(const upb_byteregion *r) { return r->start; } @@ -213,17 +284,11 @@ INLINE uint64_t upb_byteregion_fetchofs(const upb_byteregion *r) { INLINE uint64_t upb_byteregion_endofs(const upb_byteregion *r) { return r->end; } - -// Returns how many bytes are fetched and available for reading starting -// from offset "o". INLINE uint64_t upb_byteregion_available(const upb_byteregion *r, uint64_t o) { assert(o >= upb_byteregion_discardofs(r)); assert(o <= r->fetch); // Could relax this. return r->fetch - o; } - -// Returns the total number of bytes remaining after offset "o", or -// UPB_NONDELIMITED if the byteregion is non-delimited. INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) { return r->end == UPB_NONDELIMITED ? UPB_NONDELIMITED : r->end - o; } @@ -231,22 +296,10 @@ INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) { INLINE uint64_t upb_byteregion_len(const upb_byteregion *r) { return upb_byteregion_remaining(r, r->start); } - -// Sets the value of this byteregion to be a subset of the given byteregion's -// data. The caller is responsible for releasing this region before the src -// region is released (unless the region is first pinned, if pinning support is -// added. see below). void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src, uint64_t ofs, uint64_t len); void upb_byteregion_release(upb_byteregion *r); - -// Attempts to fetch more data, extending the fetched range of this byteregion. -// Returns true if the fetched region was extended by at least one byte, false -// on EOF or error (see *s for details). upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r); - -// Fetches all remaining data for "r", returning the success of the operation -// May only be used on delimited byteregions. INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) { assert(upb_byteregion_len(r) != UPB_NONDELIMITED); upb_bytesuccess_t ret; @@ -255,11 +308,6 @@ INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) { } while (ret == UPB_BYTE_OK); return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret; } - -// Discards bytes from the byteregion up until ofs (which must be greater or -// equal to upb_byteregion_discardofs()). It is valid to discard bytes that -// have not been fetched (such bytes will never be fetched) but it is an error -// to discard past the end of a delimited byteregion. INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) { assert(ofs >= upb_byteregion_discardofs(r)); assert(ofs <= upb_byteregion_endofs(r)); @@ -267,28 +315,16 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) { if (ofs > r->fetch) r->fetch = ofs; if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs); } - -// Copies "len" bytes of data into "dst", starting at ofs. The specified -// region must be available. INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs, size_t len, char *dst) { assert(ofs >= upb_byteregion_discardofs(r)); assert(len <= upb_byteregion_available(r, ofs)); upb_bytesrc_copy(r->bytesrc, ofs, len, dst); } - -// Copies all bytes from the byteregion into dst. Requires that the entire -// byteregion is fetched and that none has been discarded. INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) { assert(r->start == r->discard && r->end == r->fetch); upb_byteregion_copy(r, r->start, upb_byteregion_len(r), dst); } - -// Returns a pointer to the internal buffer for the byteregion starting at -// offset "ofs." Stores the number of bytes available in this buffer in *len. -// The returned buffer is invalidated when the byteregion is reset or released, -// or when the bytes are discarded. If the byteregion is not currently pinned, -// the pointer is only valid for the lifetime of the parent byteregion. INLINE const char *upb_byteregion_getptr(const upb_byteregion *r, uint64_t ofs, size_t *len) { assert(ofs >= upb_byteregion_discardofs(r)); @@ -354,9 +390,7 @@ INLINE int upb_bytesink_write(upb_bytesink *s, const void *buf, int len) { return s->vtbl->write(s, buf, len); } -INLINE int upb_bytesink_writestr(upb_bytesink *sink, const char *str) { - return upb_bytesink_write(sink, str, strlen(str)); -} +#define upb_bytesink_writestr(s, buf) upb_bytesink_write(s, buf, strlen(buf)) // Returns the number of bytes written or -1 on error. INLINE int upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...) { @@ -413,27 +447,47 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) { // bytesrc/bytesink for a simple contiguous string. -typedef struct { +#ifdef __cplusplus +} // extern "C" + +class upb::StringSource { + public: + StringSource(); + template explicit StringSource(const T& str); + StringSource(const char *data, size_t len); + ~StringSource(); + + // Resets the stringsrc to a state where it will vend the given string. The + // string data must be valid until the stringsrc is reset again or destroyed. + void Reset(const char* data, size_t len); + template void Reset(const T& str); + + // Returns the top-level upb_byteregion* for this stringsrc. Invalidated + // when the stringsrc is reset. + ByteRegion* AllBytes(); + + upb_bytesrc* ByteSource(); + +#else +struct upb_stringsrc { +#endif upb_bytesrc bytesrc; const char *str; size_t len; upb_byteregion byteregion; -} upb_stringsrc; +}; -// Create/free a stringsrc. +#ifdef __cplusplus +extern "C" { +#endif + +// Native C API. void upb_stringsrc_init(upb_stringsrc *s); void upb_stringsrc_uninit(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// string data must be valid until the stringsrc is reset again or destroyed. void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); - INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { return &s->bytesrc; } - -// Returns the top-level upb_byteregion* for this stringsrc. Invalidated when -// the stringsrc is reset. INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) { return &s->byteregion; } @@ -465,7 +519,111 @@ const char *upb_stringsink_release(upb_stringsink *s, size_t *len); upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s); #ifdef __cplusplus -} /* extern "C" */ +} // extern "C" + +namespace upb { + +inline uint64_t ByteRegion::start_ofs() const { + return upb_byteregion_startofs(this); +} +inline uint64_t ByteRegion::discard_ofs() const { + return upb_byteregion_discardofs(this); +} +inline uint64_t ByteRegion::fetch_ofs() const { + return upb_byteregion_fetchofs(this); +} +inline uint64_t ByteRegion::end_ofs() const { + return upb_byteregion_endofs(this); +} +inline uint64_t ByteRegion::BytesAvailable(uint64_t offset) const { + return upb_byteregion_available(this, offset); +} +inline uint64_t ByteRegion::BytesRemaining(uint64_t offset) const { + return upb_byteregion_remaining(this, offset); +} +inline uint64_t ByteRegion::Length() const { + return upb_byteregion_len(this); +} +inline void ByteRegion::Reset( + const upb_byteregion *src, uint64_t ofs, uint64_t len) { + upb_byteregion_reset(this, src, ofs, len); +} +inline void ByteRegion::Release() { + upb_byteregion_release(this); +} +inline ByteRegion::ByteSuccess ByteRegion::Fetch() { + return upb_byteregion_fetch(this); +} +inline ByteRegion::ByteSuccess ByteRegion::FetchAll() { + return upb_byteregion_fetchall(this); +} +inline void ByteRegion::Discard(uint64_t ofs) { + upb_byteregion_discard(this, ofs); +} +inline void ByteRegion::Copy(uint64_t ofs, size_t len, char *dst) const { + upb_byteregion_copy(this, ofs, len, dst); +} +inline void ByteRegion::CopyAll(char *dst) const { + upb_byteregion_copyall(this, dst); +} +inline const char *ByteRegion::GetPtr(uint64_t ofs, size_t *len) const { + return upb_byteregion_getptr(this, ofs, len); +} +inline char *ByteRegion::StrDup() const { + return upb_byteregion_strdup(this); +} +template void ByteRegion::AssignToString(T* str) { + uint64_t ofs = start_ofs(); + size_t len; + const char *ptr = GetPtr(ofs, &len); + // Emperically calling reserve() here is counterproductive and slows down + // benchmarks. If the parsing is happening in a tight loop that is reusing + // the string object, there is probably enough data reserved already and + // the reserve() call is extra overhead. + str->assign(ptr, len); + ofs += len; + while (ofs < end_ofs()) { + ptr = GetPtr(ofs, &len); + str->append(ptr, len); + ofs += len; + } +} + +template <> inline ByteRegion* GetValue(Value v) { + return static_cast(upb_value_getbyteregion(v)); +} + +template <> inline Value MakeValue(ByteRegion* v) { + return upb_value_byteregion(v); +} + +inline StringSource::StringSource() { upb_stringsrc_init(this); } +template StringSource::StringSource(const T& str) { + upb_stringsrc_init(this); + Reset(str); +} +inline StringSource::StringSource(const char *data, size_t len) { + upb_stringsrc_init(this); + Reset(data, len); +} +inline StringSource::~StringSource() { + upb_stringsrc_uninit(this); +} +inline void StringSource::Reset(const char* data, size_t len) { + upb_stringsrc_reset(this, data, len); +} +template void StringSource::Reset(const T& str) { + upb_stringsrc_reset(this, str.c_str(), str.size()); +} +inline ByteRegion* StringSource::AllBytes() { + return upb_stringsrc_allbytes(this); +} +inline upb_bytesrc* StringSource::ByteSource() { + return upb_stringsrc_bytesrc(this); +} + +} // namespace upb + #endif #endif diff --git a/upb/def.c b/upb/def.c index 4bcc0c6..d858c39 100644 --- a/upb/def.c +++ b/upb/def.c @@ -5,11 +5,12 @@ * Author: Josh Haberman */ +#include "upb/def.h" + #include -#include #include #include "upb/bytestream.h" -#include "upb/def.h" +#include "upb/handlers.h" // isalpha() etc. from are locale-dependent, which we don't want. static bool upb_isbetween(char c, char low, char high) { @@ -44,204 +45,175 @@ static bool upb_isident(const char *str, size_t len, bool full) { /* upb_def ********************************************************************/ -static void upb_msgdef_free(upb_msgdef *m); -static void upb_fielddef_free(upb_fielddef *f); -static void upb_enumdef_free(upb_enumdef *e); +upb_deftype_t upb_def_type(const upb_def *d) { return d->type; } -bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; } -bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; } +const char *upb_def_fullname(const upb_def *d) { return d->fullname; } bool upb_def_setfullname(upb_def *def, const char *fullname) { - assert(upb_def_ismutable(def)); + assert(!upb_def_isfrozen(def)); if (!upb_isident(fullname, strlen(fullname), true)) return false; - free(def->fullname); - def->fullname = strdup(fullname); + free((void*)def->fullname); + def->fullname = upb_strdup(fullname); return true; } -void upb_def_ref(const upb_def *_def, const void *owner) { - upb_def *def = (upb_def*)_def; - upb_refcount_ref(&def->refcount, owner); -} - -void upb_def_unref(const upb_def *_def, const void *owner) { - upb_def *def = (upb_def*)_def; - if (!def) return; - if (!upb_refcount_unref(&def->refcount, owner)) return; - upb_def *base = def; - // Free all defs in the SCC. - do { - upb_def *next = (upb_def*)def->refcount.next; - switch (def->type) { - case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; - case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break; - case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; - default: - assert(false); - } - def = next; - } while(def != base); -} - -void upb_def_donateref(const upb_def *_def, const void *from, const void *to) { - upb_def *def = (upb_def*)_def; - upb_refcount_donateref(&def->refcount, from, to); -} - upb_def *upb_def_dup(const upb_def *def, const void *o) { switch (def->type) { case UPB_DEF_MSG: - return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); + return upb_upcast(upb_msgdef_dup(upb_downcast_msgdef(def), o)); case UPB_DEF_FIELD: - return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); + return upb_upcast(upb_fielddef_dup(upb_downcast_fielddef(def), o)); case UPB_DEF_ENUM: - return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); + return upb_upcast(upb_enumdef_dup(upb_downcast_enumdef(def), o)); default: assert(false); return NULL; } } -static bool upb_def_init(upb_def *def, upb_deftype_t type, const void *owner) { +bool upb_def_isfrozen(const upb_def *def) { + return upb_refcounted_isfrozen(upb_upcast(def)); +} + +void upb_def_ref(const upb_def *def, const void *owner) { + upb_refcounted_ref(upb_upcast(def), owner); +} + +void upb_def_unref(const upb_def *def, const void *owner) { + upb_refcounted_unref(upb_upcast(def), owner); +} + +void upb_def_donateref(const upb_def *def, const void *from, const void *to) { + upb_refcounted_donateref(upb_upcast(def), from, to); +} + +void upb_def_checkref(const upb_def *def, const void *owner) { + upb_refcounted_checkref(upb_upcast(def), owner); +} + +static bool upb_def_init(upb_def *def, upb_deftype_t type, + const struct upb_refcounted_vtbl *vtbl, + const void *owner) { + if (!upb_refcounted_init(upb_upcast(def), vtbl, owner)) return false; def->type = type; - def->is_finalized = false; def->fullname = NULL; - return upb_refcount_init(&def->refcount, owner); + def->came_from_user = false; + return true; } static void upb_def_uninit(upb_def *def) { - upb_refcount_uninit(&def->refcount); - free(def->fullname); + free((void*)def->fullname); } -static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) { - upb_def *def = (upb_def*)refcount; - switch (def->type) { - case UPB_DEF_MSG: { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - upb_refcount_visit(refcount, &f->base.refcount, closure); - } - break; - } - case UPB_DEF_FIELD: { - upb_fielddef *f = upb_downcast_fielddef(def); - assert(f->msgdef); - upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure); - upb_def *subdef = f->sub.def; - if (subdef) - upb_refcount_visit(refcount, &subdef->refcount, closure); - break; - } - case UPB_DEF_ENUM: - case UPB_DEF_SERVICE: - case UPB_DEF_ANY: - break; - } +static const char *msgdef_name(const upb_msgdef *m) { + const char *name = upb_def_fullname(upb_upcast(m)); + return name ? name : "(anonymous)"; } -static bool upb_validate_field(const upb_fielddef *f, upb_status *s) { - if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) { +static bool upb_validate_field(upb_fielddef *f, upb_status *s) { + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { upb_status_seterrliteral(s, "fielddef must have name and number set"); return false; } - if (upb_hassubdef(f)) { + if (upb_fielddef_hassubdef(f)) { if (f->subdef_is_symbolic) { upb_status_seterrf(s, - "field %s has not been resolved", upb_fielddef_name(f)); + "field '%s' has not been resolved", upb_fielddef_name(f)); return false; - } else if (upb_fielddef_subdef(f) == NULL) { + } + + const upb_def *subdef = upb_fielddef_subdef(f); + if (subdef == NULL) { + upb_status_seterrf(s, + "field %s.%s is missing required subdef", + msgdef_name(f->msgdef), upb_fielddef_name(f)); + return false; + } else if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) { upb_status_seterrf(s, - "field is %s missing required subdef", upb_fielddef_name(f)); + "subdef of field %s.%s is not frozen or being frozen", + msgdef_name(f->msgdef), upb_fielddef_name(f)); return false; - } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) { + } else if (upb_fielddef_default_is_symbolic(f)) { upb_status_seterrf(s, - "field %s subtype is not being finalized", upb_fielddef_name(f)); + "enum field %s.%s has not been resolved", + msgdef_name(f->msgdef), upb_fielddef_name(f)); return false; } } return true; } -bool upb_finalize(upb_def *const*defs, int n, upb_status *s) { - if (n >= UINT16_MAX - 1) { - upb_status_seterrliteral(s, "too many defs (max is 64k at a time)"); - return false; - } - +bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) { // First perform validation, in two passes so we can check that we have a // transitive closure without needing to search. for (int i = 0; i < n; i++) { upb_def *def = defs[i]; - if (upb_def_isfinalized(def)) { + if (upb_def_isfrozen(def)) { // Could relax this requirement if it's annoying. - upb_status_seterrliteral(s, "def is already finalized"); + upb_status_seterrliteral(s, "def is already frozen"); goto err; } else if (def->type == UPB_DEF_FIELD) { - upb_status_seterrliteral(s, "standalone fielddefs can not be finalized"); + upb_status_seterrliteral(s, "standalone fielddefs can not be frozen"); goto err; } else { // Set now to detect transitive closure in the second pass. - def->is_finalized = true; + def->came_from_user = true; } } for (int i = 0; i < n; i++) { - upb_msgdef *m = upb_dyncast_msgdef(defs[i]); - if (!m) continue; - upb_inttable_compact(&m->itof); - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - assert(f->msgdef == m); - if (!upb_validate_field(f, s)) goto err; - } - } - - // Validation all passed, now find strongly-connected components so that - // our refcounting works with cycles. - upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors); - - // Now that ref cycles have been removed it is safe to have each fielddef - // take a ref on its subdef (if any), but only if it's a member of another - // SCC. - for (int i = 0; i < n; i++) { - upb_msgdef *m = upb_dyncast_msgdef(defs[i]); - if (!m) continue; - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - f->base.is_finalized = true; - // Release the ref taken in upb_msgdef_addfields(). - upb_fielddef_unref(f, m); - if (!upb_hassubdef(f)) continue; - assert(upb_fielddef_subdef(f)); - if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) { - // Subdef is part of a different strongly-connected component. - upb_def_ref(f->sub.def, &f->sub.def); - f->subdef_is_owned = true; + upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]); + upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]); + if (m) { + upb_inttable_compact(&m->itof); + upb_msg_iter j; + uint32_t selector = 0; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + assert(f->msgdef == m); + if (!upb_validate_field(f, s)) goto err; + f->selector_base = selector + upb_handlers_selectorbaseoffset(f); + selector += upb_handlers_selectorcount(f); } + m->selector_count = selector; + } else if (e) { + upb_inttable_compact(&e->iton); } } - return true; + // Validation all passed; freeze the defs. + return upb_refcounted_freeze((upb_refcounted*const*)defs, n, s); err: for (int i = 0; i < n; i++) { - defs[i]->is_finalized = false; + defs[i]->came_from_user = false; } + assert(!upb_ok(s)); return false; } /* upb_enumdef ****************************************************************/ +static void upb_enumdef_free(upb_refcounted *r) { + upb_enumdef *e = (upb_enumdef*)r; + upb_inttable_iter i; + upb_inttable_begin(&i, &e->iton); + for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { + // To clean up the upb_strdup() from upb_enumdef_addval(). + free(upb_value_getcstr(upb_inttable_iter_value(&i))); + } + upb_strtable_uninit(&e->ntoi); + upb_inttable_uninit(&e->iton); + upb_def_uninit(upb_upcast(e)); + free(e); +} + upb_enumdef *upb_enumdef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free}; upb_enumdef *e = malloc(sizeof(*e)); if (!e) return NULL; - if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2; - if (!upb_strtable_init(&e->ntoi)) goto err2; - if (!upb_inttable_init(&e->iton)) goto err1; + if (!upb_def_init(upb_upcast(e), UPB_DEF_ENUM, &vtbl, owner)) goto err2; + if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2; + if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1; return e; err1: @@ -251,26 +223,13 @@ err2: return NULL; } -static void upb_enumdef_free(upb_enumdef *e) { - upb_inttable_iter i; - upb_inttable_begin(&i, &e->iton); - for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { - // To clean up the strdup() from upb_enumdef_addval(). - free(upb_value_getptr(upb_inttable_iter_value(&i))); - } - upb_strtable_uninit(&e->ntoi); - upb_inttable_uninit(&e->iton); - upb_def_uninit(&e->base); - free(e); -} - upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { upb_enumdef *new_e = upb_enumdef_new(owner); if (!new_e) return NULL; upb_enum_iter i; for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { bool success = upb_enumdef_addval( - new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i)); + new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL); if (!success) { upb_enumdef_unref(new_e, owner); return NULL; @@ -279,23 +238,69 @@ upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { return new_e; } -bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) { - if (!upb_isident(name, strlen(name), false)) return false; - if (upb_enumdef_ntoi(e, name, NULL)) +bool upb_enumdef_isfrozen(const upb_enumdef *e) { + return upb_def_isfrozen(upb_upcast(e)); +} + +void upb_enumdef_ref(const upb_enumdef *e, const void *owner) { + upb_def_ref(upb_upcast(e), owner); +} + +void upb_enumdef_unref(const upb_enumdef *e, const void *owner) { + upb_def_unref(upb_upcast(e), owner); +} + +void upb_enumdef_donateref( + const upb_enumdef *e, const void *from, const void *to) { + upb_def_donateref(upb_upcast(e), from, to); +} + +void upb_enumdef_checkref(const upb_enumdef *e, const void *owner) { + upb_def_checkref(upb_upcast(e), owner); +} + +const char *upb_enumdef_fullname(const upb_enumdef *e) { + return upb_def_fullname(upb_upcast(e)); +} + +bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname) { + return upb_def_setfullname(upb_upcast(e), fullname); +} + +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, + upb_status *status) { + if (!upb_isident(name, strlen(name), false)) { + upb_status_seterrf(status, "name '%s' is not a valid identifier", name); return false; - if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) + } + if (upb_enumdef_ntoi(e, name, NULL)) { + upb_status_seterrf(status, "name '%s' is already defined", name); return false; + } + if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } if (!upb_inttable_lookup(&e->iton, num) && - !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name)))) + !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) { + upb_status_seterrliteral(status, "out of memory"); + upb_strtable_remove(&e->ntoi, name, NULL); return false; + } return true; } +int32_t upb_enumdef_default(const upb_enumdef *e) { return e->defaultval; } + void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { - assert(upb_def_ismutable(UPB_UPCAST(e))); + assert(!upb_enumdef_isfrozen(e)); e->defaultval = val; } +int upb_enumdef_numvals(const upb_enumdef *e) { + return upb_strtable_count(&e->ntoi); +} + void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { // We iterate over the ntoi table, to account for duplicate numbers. upb_strtable_begin(i, &e->ntoi); @@ -313,7 +318,15 @@ bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { const upb_value *v = upb_inttable_lookup32(&def->iton, num); - return v ? upb_value_getptr(*v) : NULL; + return v ? upb_value_getcstr(*v) : NULL; +} + +const char *upb_enum_iter_name(upb_enum_iter *iter) { + return upb_strtable_iter_key(iter); +} + +int32_t upb_enum_iter_number(upb_enum_iter *iter) { + return upb_value_getint32(upb_strtable_iter_value(iter)); } @@ -324,9 +337,7 @@ const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type} const upb_typeinfo upb_types[UPB_NUM_TYPES] = { - // END_GROUP is not real, but used to signify the pseudo-field that - // ends a group from within the group. - TYPE_INFO(void*, PTR), // ENDGROUP + TYPE_INFO(void*, PTR), // (unused) TYPE_INFO(double, DOUBLE), // DOUBLE TYPE_INFO(float, FLOAT), // FLOAT TYPE_INFO(int64_t, INT64), // INT64 @@ -340,7 +351,7 @@ const upb_typeinfo upb_types[UPB_NUM_TYPES] = { TYPE_INFO(void*, PTR), // MESSAGE TYPE_INFO(void*, BYTEREGION), // BYTES TYPE_INFO(uint32_t, UINT32), // UINT32 - TYPE_INFO(uint32_t, INT32), // ENUM + TYPE_INFO(int32_t, INT32), // ENUM TYPE_INFO(int32_t, INT32), // SFIXED32 TYPE_INFO(int64_t, INT64), // SFIXED64 TYPE_INFO(int32_t, INT32), // SINT32 @@ -349,10 +360,36 @@ const upb_typeinfo upb_types[UPB_NUM_TYPES] = { static void upb_fielddef_init_default(upb_fielddef *f); +static void upb_fielddef_uninit_default(upb_fielddef *f) { + if (f->default_is_string) + upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); +} + +static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_fielddef *f = (const upb_fielddef*)r; + if (f->msgdef) { + visit(r, upb_upcast2(f->msgdef), closure); + } + if (!f->subdef_is_symbolic && f->sub.def) { + visit(r, upb_upcast(f->sub.def), closure); + } +} + +static void freefield(upb_refcounted *r) { + upb_fielddef *f = (upb_fielddef*)r; + upb_fielddef_uninit_default(f); + if (f->subdef_is_symbolic) + free(f->sub.name); + upb_def_uninit(upb_upcast(f)); + free(f); +} + upb_fielddef *upb_fielddef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield}; upb_fielddef *f = malloc(sizeof(*f)); if (!f) return NULL; - if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) { + if (!upb_def_init(upb_upcast(f), UPB_DEF_FIELD, &vtbl, owner)) { free(f); return NULL; } @@ -360,35 +397,18 @@ upb_fielddef *upb_fielddef_new(const void *owner) { f->sub.def = NULL; f->subdef_is_symbolic = false; f->subdef_is_owned = false; - f->label = UPB_LABEL(OPTIONAL); - f->hasbit = -1; - f->offset = 0; - f->accessor = NULL; - upb_value_setfielddef(&f->fval, f); + f->label_ = UPB_LABEL(OPTIONAL); // These are initialized to be invalid; the user must set them explicitly. // Could relax this later if it's convenient and non-confusing to have a // defaults for them. - f->type = UPB_TYPE_NONE; - f->number = 0; + f->type_ = UPB_TYPE_NONE; + f->number_ = 0; upb_fielddef_init_default(f); return f; } -static void upb_fielddef_uninit_default(upb_fielddef *f) { - if (f->default_is_string) - upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); -} - -static void upb_fielddef_free(upb_fielddef *f) { - if (f->subdef_is_owned) - upb_def_unref(f->sub.def, &f->sub.def); - upb_fielddef_uninit_default(f); - upb_def_uninit(UPB_UPCAST(f)); - free(f); -} - upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { upb_fielddef *newf = upb_fielddef_new(owner); if (!newf) return NULL; @@ -396,10 +416,6 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { upb_fielddef_setlabel(newf, upb_fielddef_label(f)); upb_fielddef_setnumber(newf, upb_fielddef_number(f)); upb_fielddef_setname(newf, upb_fielddef_name(f)); - upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f)); - upb_fielddef_setoffset(newf, upb_fielddef_offset(f)); - upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f)); - upb_fielddef_setfval(newf, upb_fielddef_fval(f)); if (f->default_is_string) { upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); size_t len; @@ -424,13 +440,64 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { } strcpy(newname, "."); strcat(newname, f->sub.def->fullname); - upb_fielddef_setsubtypename(newf, newname); + upb_fielddef_setsubdefname(newf, newname); free(newname); } return newf; } +bool upb_fielddef_isfrozen(const upb_fielddef *f) { + return upb_def_isfrozen(upb_upcast(f)); +} + +void upb_fielddef_ref(const upb_fielddef *f, const void *owner) { + upb_def_ref(upb_upcast(f), owner); +} + +void upb_fielddef_unref(const upb_fielddef *f, const void *owner) { + upb_def_unref(upb_upcast(f), owner); +} + +void upb_fielddef_donateref( + const upb_fielddef *f, const void *from, const void *to) { + upb_def_donateref(upb_upcast(f), from, to); +} + +void upb_fielddef_checkref(const upb_fielddef *f, const void *owner) { + upb_def_checkref(upb_upcast(f), owner); +} + +upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { + return f->type_; +} + +upb_label_t upb_fielddef_label(const upb_fielddef *f) { + return f->label_; +} + +uint32_t upb_fielddef_number(const upb_fielddef *f) { return f->number_; } + +const char *upb_fielddef_name(const upb_fielddef *f) { + return upb_def_fullname(upb_upcast(f)); +} + +const upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { + return f->msgdef; +} + +upb_msgdef *upb_fielddef_msgdef_mutable(upb_fielddef *f) { + return (upb_msgdef*)f->msgdef; +} + +bool upb_fielddef_setname(upb_fielddef *f, const char *name) { + return upb_def_setfullname(upb_upcast(f), name); +} + +upb_value upb_fielddef_default(const upb_fielddef *f) { + return f->defaultval; +} + static void upb_fielddef_init_default(upb_fielddef *f) { f->default_is_string = false; switch (upb_fielddef_type(f)) { @@ -455,13 +522,12 @@ static void upb_fielddef_init_default(upb_fielddef *f) { break; case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; - case UPB_TYPE_ENDGROUP: assert(false); case UPB_TYPE_NONE: break; } } const upb_def *upb_fielddef_subdef(const upb_fielddef *f) { - if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) { + if (upb_fielddef_hassubdef(f) && upb_fielddef_isfrozen(f)) { assert(f->sub.def); return f->sub.def; } else { @@ -473,65 +539,34 @@ upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) { return (upb_def*)upb_fielddef_subdef(f); } -const char *upb_fielddef_subtypename(upb_fielddef *f) { - assert(upb_fielddef_ismutable(f)); +const char *upb_fielddef_subdefname(const upb_fielddef *f) { + assert(!upb_fielddef_isfrozen(f)); return f->subdef_is_symbolic ? f->sub.name : NULL; } -// Could expose this to clients if a client wants to call it independently -// of upb_resolve() for whatever reason. -static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) { - if (!f->default_is_string) return true; - // Resolve the enum's default from a string to an integer. - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); // Points to either a real default or the empty string. - upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f)); - int32_t val = 0; - if (upb_byteregion_len(bytes) == 0) { - upb_value_setint32(&f->defaultval, e->defaultval); - } else { - size_t len; - // ptr is guaranteed to be NULL-terminated because the byteregion was - // created with upb_byteregion_newl(). - const char *ptr = upb_byteregion_getptr( - bytes, upb_byteregion_startofs(bytes), &len); - assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. - bool success = upb_enumdef_ntoi(e, ptr, &val); - if (!success) { - upb_status_seterrf( - s, "Default enum value (%s) is not a member of the enum", ptr); - return false; - } - upb_value_setint32(&f->defaultval, val); - } - f->default_is_string = false; - upb_byteregion_free(bytes); - return true; -} - -bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { +bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number) { assert(f->msgdef == NULL); - f->number = number; + f->number_ = number; return true; } bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { - assert(upb_fielddef_ismutable(f)); + assert(!upb_fielddef_isfrozen(f)); upb_fielddef_uninit_default(f); - f->type = type; + f->type_ = type; upb_fielddef_init_default(f); return true; } bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { - assert(upb_fielddef_ismutable(f)); - f->label = label; + assert(!upb_fielddef_isfrozen(f)); + f->label_ = label; return true; } void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { - assert(upb_fielddef_ismutable(f)); - assert(!upb_isstring(f) && !upb_issubmsg(f)); + assert(!upb_fielddef_isfrozen(f)); + assert(!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f)); if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); @@ -542,20 +577,21 @@ void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { } bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { - assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM)); + assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE(ENUM)); + if (f->type_ == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); upb_byteregion_free(bytes); } else { - assert(f->type == UPB_TYPE(ENUM)); + assert(f->type_ == UPB_TYPE(ENUM)); } - if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + upb_byteregion *r = upb_byteregion_newl(str, len); upb_value_setbyteregion(&f->defaultval, r); upb_bytesuccess_t ret = upb_byteregion_fetch(r); - (void)ret; - assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); + UPB_ASSERT_VAR(ret, ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r)); f->default_is_string = true; return true; @@ -565,77 +601,127 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0); } -void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { - assert(upb_fielddef_ismutable(f)); - // TODO: we need an ownership/freeing mechanism for dynamically-allocated - // fvals. One possibility is to let the user supply a free() function - // and call it when the fval is no longer referenced. Would have to - // ensure that no common use cases need cycles. - // - // For now the fval has no ownership; the caller must simply guarantee - // somehow that it outlives any handlers/plan. - f->fval = fval; +bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { + return f->default_is_string && f->type_ == UPB_TYPE_ENUM; } -void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) { - assert(upb_fielddef_ismutable(f)); - f->hasbit = hasbit; -} +bool upb_fielddef_resolvedefault(upb_fielddef *f) { + if (!upb_fielddef_default_is_symbolic(f)) return true; -void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) { - assert(upb_fielddef_ismutable(f)); - f->offset = offset; -} - -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) { - assert(upb_fielddef_ismutable(f)); - f->accessor = tbl; + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + const upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef(f)); + assert(bytes); // Points to either a real default or the empty string. + assert(e); + if (upb_byteregion_len(bytes) == 0) { + // The "default default" for an enum is the first defined value. + upb_value_setint32(&f->defaultval, e->defaultval); + } else { + size_t len; + int32_t val = 0; + // ptr is guaranteed to be NULL-terminated because the byteregion was + // created with upb_byteregion_newl(). + const char *ptr = upb_byteregion_getptr( + bytes, upb_byteregion_startofs(bytes), &len); + assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk + if (!upb_enumdef_ntoi(e, ptr, &val)) { + return false; + } + upb_value_setint32(&f->defaultval, val); + } + f->default_is_string = false; + upb_byteregion_free(bytes); + return true; } -static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) { - if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP)) - return upb_dyncast_msgdef_const(subdef) != NULL; - else if (f->type == UPB_TYPE(ENUM)) - return upb_dyncast_enumdef_const(subdef) != NULL; +static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef) { + if (f->type_ == UPB_TYPE(MESSAGE) || f->type_ == UPB_TYPE(GROUP)) + return upb_dyncast_msgdef(subdef) != NULL; + else if (f->type_ == UPB_TYPE(ENUM)) + return upb_dyncast_enumdef(subdef) != NULL; else { assert(false); return false; } } -bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) { - assert(upb_fielddef_ismutable(f)); - assert(upb_hassubdef(f)); - assert(subdef); - if (!upb_subtype_typecheck(f, subdef)) return false; - if (f->subdef_is_symbolic) free(f->sub.name); +static void release_subdef(upb_fielddef *f) { + if (f->subdef_is_symbolic) { + free(f->sub.name); + } else if (f->sub.def) { + upb_unref2(f->sub.def, f); + } +} + +bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef) { + assert(!upb_fielddef_isfrozen(f)); + assert(upb_fielddef_hassubdef(f)); + if (subdef && !upb_subdef_typecheck(f, subdef)) return false; + release_subdef(f); f->sub.def = subdef; f->subdef_is_symbolic = false; + if (f->sub.def) upb_ref2(f->sub.def, f); return true; } -bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) { - assert(upb_fielddef_ismutable(f)); - assert(upb_hassubdef(f)); - if (f->subdef_is_symbolic) free(f->sub.name); - f->sub.name = strdup(name); +bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name) { + assert(!upb_fielddef_isfrozen(f)); + assert(upb_fielddef_hassubdef(f)); + release_subdef(f); + f->sub.name = upb_strdup(name); f->subdef_is_symbolic = true; return true; } +bool upb_fielddef_issubmsg(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_GROUP || + upb_fielddef_type(f) == UPB_TYPE_MESSAGE; +} + +bool upb_fielddef_isstring(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_STRING || + upb_fielddef_type(f) == UPB_TYPE_BYTES; +} + +bool upb_fielddef_isseq(const upb_fielddef *f) { + return upb_fielddef_label(f) == UPB_LABEL_REPEATED; +} + +bool upb_fielddef_isprimitive(const upb_fielddef *f) { + return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); +} + +bool upb_fielddef_hassubdef(const upb_fielddef *f) { + return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM); +} + /* upb_msgdef *****************************************************************/ +static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_msgdef *m = (const upb_msgdef*)r; + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + visit(r, upb_upcast2(f), closure); + } +} + +static void freemsg(upb_refcounted *r) { + upb_msgdef *m = (upb_msgdef*)r; + upb_strtable_uninit(&m->ntof); + upb_inttable_uninit(&m->itof); + upb_def_uninit(upb_upcast(m)); + free(m); +} + upb_msgdef *upb_msgdef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg}; upb_msgdef *m = malloc(sizeof(*m)); if (!m) return NULL; - if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2; - if (!upb_inttable_init(&m->itof)) goto err2; - if (!upb_strtable_init(&m->ntof)) goto err1; - m->size = 0; - m->hasbit_bytes = 0; - m->extstart = 0; - m->extend = 0; + if (!upb_def_init(upb_upcast(m), UPB_DEF_MSG, &vtbl, owner)) goto err2; + if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2; + if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1; return m; err1: @@ -645,20 +731,10 @@ err2: return NULL; } -static void upb_msgdef_free(upb_msgdef *m) { - upb_strtable_uninit(&m->ntof); - upb_inttable_uninit(&m->itof); - upb_def_uninit(&m->base); - free(m); -} - upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { upb_msgdef *newm = upb_msgdef_new(owner); if (!newm) return NULL; - upb_msgdef_setsize(newm, upb_msgdef_size(m)); - upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m)); - upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m)); - upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m))); + upb_def_setfullname(upb_upcast(newm), upb_def_fullname(upb_upcast(m))); upb_msg_iter i; for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); @@ -670,26 +746,33 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { return newm; } -void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) { - assert(upb_def_ismutable(UPB_UPCAST(m))); - m->size = size; +bool upb_msgdef_isfrozen(const upb_msgdef *m) { + return upb_def_isfrozen(upb_upcast(m)); } -void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) { - assert(upb_def_ismutable(UPB_UPCAST(m))); - m->hasbit_bytes = bytes; +void upb_msgdef_ref(const upb_msgdef *m, const void *owner) { + upb_def_ref(upb_upcast(m), owner); } -bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) { - assert(upb_def_ismutable(UPB_UPCAST(m))); - if (start == 0 && end == 0) { - // Clearing the extension range -- ok to fall through. - } else if (start >= end || start < 1 || end > UPB_MAX_FIELDNUMBER) { - return false; - } - m->extstart = start; - m->extend = start; - return true; +void upb_msgdef_unref(const upb_msgdef *m, const void *owner) { + upb_def_unref(upb_upcast(m), owner); +} + +void upb_msgdef_donateref( + const upb_msgdef *m, const void *from, const void *to) { + upb_def_donateref(upb_upcast(m), from, to); +} + +void upb_msgdef_checkref(const upb_msgdef *m, const void *owner) { + upb_def_checkref(upb_upcast(m), owner); +} + +const char *upb_msgdef_fullname(const upb_msgdef *m) { + return upb_def_fullname(upb_upcast(m)); +} + +bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname) { + return upb_def_setfullname(upb_upcast(m), fullname); } bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, @@ -697,6 +780,8 @@ bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, // Check constraints for all fields before performing any action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; + // TODO(haberman): handle the case where two fields of the input duplicate + // name or number. if (f->msgdef != NULL || upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 || upb_msgdef_itof(m, upb_fielddef_number(f)) || @@ -710,306 +795,48 @@ bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, f->msgdef = m; upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); - upb_fielddef_ref(f, m); + upb_ref2(f, m); + upb_ref2(m, f); if (ref_donor) upb_fielddef_unref(f, ref_donor); } return true; } -void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { - upb_inttable_begin(iter, &m->itof); +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, + const void *ref_donor) { + return upb_msgdef_addfields(m, &f, 1, ref_donor); } -void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } - - -/* upb_symtab *****************************************************************/ - -upb_symtab *upb_symtab_new(const void *owner) { - upb_symtab *s = malloc(sizeof(*s)); - upb_refcount_init(&s->refcount, owner); - upb_strtable_init(&s->symtab); - return s; +const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { + const upb_value *val = upb_inttable_lookup32(&m->itof, i); + return val ? (const upb_fielddef*)upb_value_getptr(*val) : NULL; } -void upb_symtab_ref(const upb_symtab *s, const void *owner) { - upb_refcount_ref(&s->refcount, owner); +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { + const upb_value *val = upb_strtable_lookup(&m->ntof, name); + return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; } -void upb_symtab_unref(const upb_symtab *s, const void *owner) { - if(s && upb_refcount_unref(&s->refcount, owner)) { - upb_symtab *destroying = (upb_symtab*)s; - upb_strtable_iter i; - upb_strtable_begin(&i, &destroying->symtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) - upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); - upb_strtable_uninit(&destroying->symtab); - upb_refcount_uninit(&destroying->refcount); - free(destroying); - } +upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) { + return (upb_fielddef*)upb_msgdef_itof(m, i); } -void upb_symtab_donateref( - const upb_symtab *s, const void *from, const void *to) { - upb_refcount_donateref(&s->refcount, from, to); -} - -const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, - upb_deftype_t type, const void *owner) { - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - const upb_def **defs = malloc(sizeof(*defs) * total); - upb_strtable_iter iter; - upb_strtable_begin(&iter, &s->symtab); - int i = 0; - for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - *count = i; - if (owner) - for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner); - return defs; -} - -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, - const void *owner) { - const upb_value *v = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = v ? upb_value_getptr(*v) : NULL; - if (ret) upb_def_ref(ret, owner); - return ret; -} - -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, - const void *owner) { - const upb_value *v = upb_strtable_lookup(&s->symtab, sym); - upb_def *def = v ? upb_value_getptr(*v) : NULL; - upb_msgdef *ret = NULL; - if(def && def->type == UPB_DEF_MSG) { - ret = upb_downcast_msgdef(def); - upb_def_ref(def, owner); - } - return ret; -} - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_def *upb_resolvename(const upb_strtable *t, - const char *base, const char *sym) { - if(strlen(sym) == 0) return NULL; - if(sym[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - const upb_value *v = upb_strtable_lookup(t, sym + 1); - return v ? upb_value_getptr(*v) : NULL; - } else { - // Remove components from base until we find an entry or run out. - // TODO: This branch is totally broken, but currently not used. - (void)base; - assert(false); - return NULL; - } +upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name) { + return (upb_fielddef*)upb_msgdef_ntof(m, name); } -const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym, const void *owner) { - upb_def *ret = upb_resolvename(&s->symtab, base, sym); - if (ret) upb_def_ref(ret, owner); - return ret; -} - -// Adds dups of any existing def that can reach a def with the same name as one -// of "defs." This is to provide a consistent output graph as documented in -// the header file. We use a modified depth-first traversal that traverses -// each SCC (which we already computed) as if it were a single node. This -// allows us to traverse the possibly-cyclic graph as if it were a DAG and to -// easily dup the correct set of nodes with O(n) time. -// -// Returns true if defs that can reach "def" need to be duplicated into deftab. -static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab, - const void *new_owner, upb_inttable *seen, - upb_status *s) { - // Memoize results of this function for efficiency (since we're traversing a - // DAG this is not needed to limit the depth of the search). - upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); - if (v) return upb_value_getbool(*v); - - // Visit submessages for all messages in the SCC. - bool need_dup = false; - const upb_def *base = def; - do { - assert(upb_def_isfinalized(def)); - if (def->type == UPB_DEF_FIELD) continue; - upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def)); - if (v) { - upb_def *add_def = upb_value_getptr(*v); - if (add_def->refcount.next && add_def->refcount.next != &def->refcount) { - upb_status_seterrf(s, "conflicting existing defs for name: '%s'", - upb_def_fullname(def)); - return false; - } - need_dup = true; - } - const upb_msgdef *m = upb_dyncast_msgdef_const(def); - if (m) { - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - if (!upb_hassubdef(f)) continue; - // |= to avoid short-circuit; we need its side-effects. - need_dup |= upb_resolve_dfs( - upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s); - if (!upb_ok(s)) return false; - } - } - } while ((def = (upb_def*)def->refcount.next) != base); - - if (need_dup) { - // Dup any defs that don't already have entries in deftab. - def = base; - do { - if (def->type == UPB_DEF_FIELD) continue; - const char *name = upb_def_fullname(def); - if (upb_strtable_lookup(deftab, name) == NULL) { - upb_def *newdef = upb_def_dup(def, new_owner); - if (!newdef) goto oom; - // We temporarily use this field to track who we were dup'd from. - newdef->refcount.next = (upb_refcount*)def; - if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef))) - goto oom; - } - } while ((def = (upb_def*)def->refcount.next) != base); - } - - upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); - return need_dup; - -oom: - upb_status_seterrliteral(s, "out of memory"); - return false; +int upb_msgdef_numfields(const upb_msgdef *m) { + return upb_strtable_count(&m->ntof); } -bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, - upb_status *status) { - upb_def **add_defs = NULL; - upb_strtable addtab; - if (!upb_strtable_init(&addtab)) { - upb_status_seterrliteral(status, "out of memory"); - return false; - } - - // Add new defs to table. - for (int i = 0; i < n; i++) { - upb_def *def = defs[i]; - assert(upb_def_ismutable(def)); - const char *fullname = upb_def_fullname(def); - if (!fullname) { - upb_status_seterrliteral( - status, "Anonymous defs cannot be added to a symtab"); - goto err; - } - if (upb_strtable_lookup(&addtab, fullname) != NULL) { - upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); - goto err; - } - if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) - goto oom_err; - // We temporarily use this field to indicate that we came from the user's - // list rather than being dup'd. - def->refcount.next = NULL; - } - - // Add dups of any existing def that can reach a def with the same name as - // one of "defs." - upb_inttable seen; - if (!upb_inttable_init(&seen)) goto oom_err; - upb_strtable_iter i; - upb_strtable_begin(&i, &s->symtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - upb_resolve_dfs(def, &addtab, ref_donor, &seen, status); - if (!upb_ok(status)) goto err; - } - upb_inttable_uninit(&seen); - - // Now using the table, resolve symbolic references. - upb_strtable_begin(&i, &addtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - upb_msgdef *m = upb_dyncast_msgdef(def); - if (!m) continue; - // Type names are resolved relative to the message in which they appear. - const char *base = upb_def_fullname(UPB_UPCAST(m)); - - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - const char *name = upb_fielddef_subtypename(f); - if (name) { - upb_def *subdef = upb_resolvename(&addtab, base, name); - if (subdef == NULL) { - upb_status_seterrf( - status, "couldn't resolve name '%s' in message '%s'", name, base); - goto err; - } else if (!upb_fielddef_setsubdef(f, subdef)) { - upb_status_seterrf( - status, "def '%s' had the wrong type for field '%s'", - upb_def_fullname(subdef), upb_fielddef_name(f)); - goto err; - } - } - - if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) && - !upb_fielddef_resolvedefault(f, status)) - goto err; - } - } - - // We need an array of the defs in addtab, for passing to upb_finalize. - add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); - if (add_defs == NULL) goto oom_err; - upb_strtable_begin(&i, &addtab); - for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) - add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); - - // Restore the next pointer that we stole. - for (int i = 0; i < n; i++) - add_defs[i]->refcount.next = &add_defs[i]->refcount; +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { + upb_inttable_begin(iter, &m->itof); +} - if (!upb_finalize(add_defs, n, status)) goto err; - upb_strtable_uninit(&addtab); +void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } - for (int i = 0; i < n; i++) { - upb_def *def = add_defs[i]; - const char *name = upb_def_fullname(def); - upb_def_donateref(def, ref_donor, s); - upb_value *v = upb_strtable_lookup(&s->symtab, name); - if(v) { - upb_def_unref(upb_value_getptr(*v), s); - upb_value_setptr(v, def); - } else { - upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); - } - } - free(add_defs); - return true; +bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); } -oom_err: - upb_status_seterrliteral(status, "out of memory"); -err: { - // Need to unref any defs we dup'd (we can distinguish them from defs that - // the user passed in by their def->refcount.next pointers). - upb_strtable_iter i; - upb_strtable_begin(&i, &addtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - if (def->refcount.next) upb_def_unref(def, s); - } - } - upb_strtable_uninit(&addtab); - free(add_defs); - return false; +upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) { + return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); } diff --git a/upb/def.h b/upb/def.h index 018f375..4210e8c 100644 --- a/upb/def.h +++ b/upb/def.h @@ -12,47 +12,48 @@ * - upb_enumdef: describes an enum. * (TODO: definitions of services). * - * Defs go through two distinct phases of life: + * Like upb_refcounted objects, defs are mutable only until frozen, and are + * only thread-safe once frozen. * - * 1. MUTABLE: when first created, the properties of the def can be set freely - * (for example a message's name, its list of fields, the name/number of - * fields, etc). During this phase the def is *not* thread-safe, and may - * not be used for any purpose except to set its properties (it can't be - * used to parse anything, create any messages in memory, etc). - * - * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs, - * which makes them thread-safe and immutable. Finalized defs may only be - * accessed through a CONST POINTER. If you want to modify an existing - * immutable def, copy it with upb_*_dup() and modify and finalize the copy. - * - * The refcounting of defs works properly no matter what state the def is in. - * Once the def is finalized it is guaranteed that any def reachable from a - * live def is also live (so a ref on the base of a message tree keeps the - * whole tree alive). - * - * You can test for which stage of life a def is in by calling - * upb_def_ismutable(). This is particularly useful for dynamic language - * bindings, which must properly guarantee that the dynamic language cannot - * break the rules laid out above. - * - * It would be possible to make the defs thread-safe during stage 1 by using - * mutexes internally and changing any methods returning pointers to return - * copies instead. This could be important if we are integrating with a VM or - * interpreter that does not naturally serialize access to wrapped objects (for - * example, in the case of Python this is not necessary because of the GIL). + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ -#include "upb/refcount.h" -#include "upb/table.h" - #ifdef __cplusplus -extern "C" { +#include +#include +#include + +namespace upb { +class Def; +class EnumDef; +class FieldDef; +class MessageDef; +} + +typedef upb::Def upb_def; +typedef upb::EnumDef upb_enumdef; +typedef upb::FieldDef upb_fielddef; +typedef upb::MessageDef upb_msgdef; +#else +struct upb_def; +struct upb_enumdef; +struct upb_fielddef; +struct upb_msgdef; + +typedef struct upb_def upb_def; +typedef struct upb_enumdef upb_enumdef; +typedef struct upb_fielddef upb_fielddef; +typedef struct upb_msgdef upb_msgdef; #endif -/* upb_def: base class for defs **********************************************/ +#include "upb/refcounted.h" + + +/* upb::Def: base class for defs *********************************************/ // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. @@ -65,64 +66,97 @@ typedef enum { UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() } upb_deftype_t; -typedef struct _upb_def { - upb_refcount refcount; - char *fullname; - upb_deftype_t type; - bool is_finalized; -} upb_def; +#ifdef __cplusplus -#define UPB_UPCAST(ptr) (&(ptr)->base) +class upb::Def { + public: + typedef upb_deftype_t Type; + + Def* Dup(const void *owner) const; + + // Though not declared as such in C++, upb::RefCounted is the base of + // Def and we can upcast to it. + RefCounted* Upcast(); + const RefCounted* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + Type def_type() const; + + // "fullname" is the def's fully-qualified name (eg. foo.bar.Message). + const char *full_name() const; + + // The def must be mutable. Caller retains ownership of fullname. Defs are + // not required to have a name; if a def has no name when it is frozen, it + // will remain an anonymous def. + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + // Freezes the given defs; this validates all constraints and marks the defs + // as frozen (read-only). "defs" may not contain any fielddefs, but fields + // of any msgdefs will be frozen. + // + // Symbolic references to sub-types and enum defaults must have already been + // resolved. Any mutable defs reachable from any of "defs" must also be in + // the list; more formally, "defs" must be a transitive closure of mutable + // defs. + // + // After this operation succeeds, the finalized defs must only be accessed + // through a const pointer! + static bool Freeze(Def *const*defs, int n, Status *status); + static bool Freeze(const std::vector& defs, Status *status); + + private: + UPB_DISALLOW_POD_OPS(Def); + +#else +struct upb_def { +#endif + upb_refcounted base; + const char *fullname; + upb_deftype_t type:8; + // Used as a flag during the def's mutable stage. Must be false unless + // it is currently being used by a function on the stack. This allows + // us to easily determine which defs were passed into the function's + // current invocation. + bool came_from_user; +}; + +#define UPB_DEF_INIT(name, type) {UPB_REFCOUNT_INIT, name, type, false} + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +upb_def *upb_def_dup(const upb_def *def, const void *owner); -// Call to ref/unref a def. These are thread-safe. If the def is finalized, -// it is guaranteed that any def reachable from a live def is also live. +// From upb_refcounted. +bool upb_def_isfrozen(const upb_def *def); void upb_def_ref(const upb_def *def, const void *owner); void upb_def_unref(const upb_def *def, const void *owner); void upb_def_donateref(const upb_def *def, const void *from, const void *to); +void upb_def_checkref(const upb_def *def, const void *owner); -upb_def *upb_def_dup(const upb_def *def, const void *owner); - -// A def is mutable until it has been finalized. -bool upb_def_ismutable(const upb_def *def); -bool upb_def_isfinalized(const upb_def *def); - -// "fullname" is the def's fully-qualified name (eg. foo.bar.Message). -INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; } - -// The def must be mutable. Caller retains ownership of fullname. Defs are -// not required to have a name; if a def has no name when it is finalized, it -// will remain an anonymous def. +upb_deftype_t upb_def_type(const upb_def *d); +const char *upb_def_fullname(const upb_def *d); bool upb_def_setfullname(upb_def *def, const char *fullname); +bool upb_def_freeze(upb_def *const*defs, int n, upb_status *status); +#ifdef __cplusplus +} // extern "C" +#endif + -// Finalizes the given defs; this validates all constraints and marks the defs -// as finalized (read-only). This will also cause fielddefs to take refs on -// their subdefs so that any reachable def will be kept alive (but this is -// done in a way that correctly handles circular references). -// -// On success, a new list is returned containing the finalized defs and -// ownership of the "defs" list passes to the function. On failure NULL is -// returned and the caller retains ownership of "defs." -// -// Symbolic references to sub-types or enum defaults must have already been -// resolved. "defs" must contain the transitive closure of any mutable defs -// reachable from the any def in the list. In other words, there may not be a -// mutable def which is reachable from one of "defs" that does not appear -// elsewhere in "defs." "defs" may not contain fielddefs, but any fielddefs -// reachable from the given msgdefs will be finalized. -// -// n is currently limited to 64k defs, if more are required break them into -// batches of 64k (or we could raise this limit, at the cost of a bigger -// upb_def structure or complexity in upb_finalize()). -bool upb_finalize(upb_def *const*defs, int n, upb_status *status); - - -/* upb_fielddef ***************************************************************/ +/* upb::FieldDef **************************************************************/ // We choose these to match descriptor.proto. Clients may use UPB_TYPE() and // UPB_LABEL() instead of referencing these directly. typedef enum { UPB_TYPE_NONE = -1, // Internal-only, may be removed. - UPB_TYPE_ENDGROUP = 0, // Internal-only, may be removed. UPB_TYPE_DOUBLE = 1, UPB_TYPE_FLOAT = 2, UPB_TYPE_INT64 = 3, @@ -164,426 +198,485 @@ typedef struct { extern const upb_typeinfo upb_types[UPB_NUM_TYPES]; +#ifdef __cplusplus + // A upb_fielddef describes a single field in a message. It is most often // found as a part of a upb_msgdef, but can also stand alone to represent // an extension. -typedef struct _upb_fielddef { +class upb::FieldDef { + public: + typedef upb_fieldtype_t Type; + typedef upb_label_t Label; + + // Returns NULL if memory allocation failed. + static FieldDef* New(const void *owner); + + // Duplicates the given field, returning NULL if memory allocation failed. + // When a fielddef is duplicated, the subdef (if any) is made symbolic if it + // wasn't already. If the subdef is set but has no name (which is possible + // since msgdefs are not required to have a name) the new fielddef's subdef + // will be unset. + FieldDef* Dup(const void *owner) const; + + // Though not declared as such in C++, upb::Def is the base of FieldDef and + // we can upcast to it. + Def* Upcast(); + const Def* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Functionality from upb::Def. + const char *full_name() const; + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + Type type() const; // Return UPB_TYPE_NONE if uninitialized. + Label label() const; // Defaults to UPB_LABEL_OPTIONAL. + uint32_t number() const; // Returns 0 if uninitialized. + const MessageDef* message_def() const; + + // "number" and "name" must be set before the fielddef is added to a msgdef. + // For the moment we do not allow these to be set once the fielddef is added + // to a msgdef -- this could be relaxed in the future. + bool set_number(uint32_t number); + bool set_type(upb_fieldtype_t type); + bool set_label(upb_label_t label); + + // These are the same as full_name()/set_full_name(), but since fielddefs + // most often use simple, non-qualified names, we provide this accessor + // also. Generally only extensions will want to think of this name as + // fully-qualified. + bool set_name(const char *name); + bool set_name(const std::string& name); + const char *name() const; + + bool IsSubMessage() const; + bool IsString() const; + bool IsSequence() const; + bool IsPrimitive() const; + + // Returns the default value for this fielddef, which may either be something + // the client set explicitly or the "default default" (0 for numbers, empty + // for strings). The field's type indicates the type of the returned value, + // except for enum fields that are still mutable. + // + // For enums the default can be set either numerically or symbolically -- the + // upb_fielddef_default_is_symbolic() function below will indicate which it + // is. For string defaults, the value will be a upb_byteregion which is + // invalidated by any other non-const call on this object. Once the fielddef + // is frozen, symbolic enum defaults are resolved, so frozen enum fielddefs + // always have a default of type int32. + Value default_value() const; + + // Sets default value for the field. For numeric types, use + // upb_fielddef_setdefault(), and "value" must match the type of the field. + // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may + // use either, since the default may be set either numerically or + // symbolically. + // + // NOTE: May only be called for fields whose type has already been set. + // Also, will be reset to default if the field's type is set again. + void set_default_value(Value value); + bool set_default_string(const void *str, size_t len); + bool set_default_string(const std::string& str); + void set_default_cstr(const char *str); + + // The results of this function are only meaningful for mutable enum fields, + // which can have a default specified either as an integer or as a string. + // If this returns true, the default returned from upb_fielddef_default() is + // a string, otherwise it is an integer. + bool IsDefaultSymbolic() const; + + // If this is an enum field with a symbolic default, resolves the default and + // returns true if resolution was successful or if this field didn't need to + // be resolved (because it is not an enum with a symbolic default). + bool ResolveDefault(); + + // Submessage and enum fields must reference a "subdef", which is the + // upb_msgdef or upb_enumdef that defines their type. Note that when the + // fielddef is mutable it may not have a subdef *yet*, but this function + // still returns true to indicate that the field's type requires a subdef. + bool HasSubDef() const; + + // Returns the enum or submessage def or symbolic name for this field, if + // any. Requires that upb_hassubdef(f). Returns NULL if the subdef has not + // been set or if you ask for a subdef when the subdef is currently set + // symbolically (or vice-versa). To access the subdef's name for a linked + // fielddef, use upb_def_fullname(upb_fielddef_subdef(f)). + // + // Caller does *not* own a ref on the returned def or string. + // upb_fielddef_subdefename() is non-const because frozen defs will never + // have a symbolic reference (they must be resolved before the msgdef can be + // frozen). + const Def* subdef() const; + const char* subdef_name() const; + + // Before a fielddef is frozen, its subdef may be set either directly (with a + // upb::Def*) or symbolically. Symbolic refs must be resolved before the + // containing msgdef can be frozen (see upb_resolve() above). The client is + // responsible for making sure that "subdef" lives until this fielddef is + // frozen or deleted. + // + // Both methods require that upb_hassubdef(f) (so the type must be set prior + // to calling these methods). Returns false if this is not the case, or if + // the given subdef is not of the correct type. The subdef is reset if the + // field's type is changed. The subdef can be set to NULL to clear it. + bool set_subdef(const Def* subdef); + bool set_subdef_name(const char* name); + bool set_subdef_name(const std::string& name); + + private: + UPB_DISALLOW_POD_OPS(FieldDef); + +#else +struct upb_fielddef { +#endif upb_def base; - struct _upb_msgdef *msgdef; + const upb_msgdef *msgdef; union { + const upb_def *def; // If !subdef_is_symbolic. char *name; // If subdef_is_symbolic. - upb_def *def; // If !subdef_is_symbolic. } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). bool subdef_is_symbolic; bool default_is_string; bool subdef_is_owned; - upb_fieldtype_t type; - upb_label_t label; - int16_t hasbit; - uint16_t offset; - int32_t number; + upb_fieldtype_t type_; + upb_label_t label_; + uint32_t number_; upb_value defaultval; // Only for non-repeated scalars and strings. - upb_value fval; - struct _upb_accessor_vtbl *accessor; - const void *prototype; -} upb_fielddef; - -// Returns NULL if memory allocation failed. + uint32_t selector_base; // Used to index into a upb::Handlers table. +}; + +// This will only work for static initialization because of the subdef_is_owned +// initialization. Theoretically the other _INIT() macros could possible work +// for non-static initialization, but this has not been tested. +#define UPB_FIELDDEF_INIT(label, type, name, num, msgdef, subdef, \ + selector_base, defaultval) \ + {UPB_DEF_INIT(name, UPB_DEF_FIELD), msgdef, {subdef}, false, \ + type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, \ + false, /* subdef_is_owned: not used since fielddef is not freed. */ \ + type, label, num, defaultval, selector_base} + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif upb_fielddef *upb_fielddef_new(const void *owner); - -INLINE void upb_fielddef_ref(upb_fielddef *f, const void *owner) { - upb_def_ref(UPB_UPCAST(f), owner); -} -INLINE void upb_fielddef_unref(upb_fielddef *f, const void *owner) { - upb_def_unref(UPB_UPCAST(f), owner); -} - -// Duplicates the given field, returning NULL if memory allocation failed. -// When a fielddef is duplicated, the subdef (if any) is made symbolic if it -// wasn't already. If the subdef is set but has no name (which is possible -// since msgdefs are not required to have a name) the new fielddef's subdef -// will be unset. upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner); -INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) { - return upb_def_ismutable(UPB_UPCAST(f)); -} -INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) { - return !upb_fielddef_ismutable(f); -} - -// Simple accessors. /////////////////////////////////////////////////////////// - -INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { - return f->type; -} -INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) { - return f->label; -} -INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; } -INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; } -INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; } -INLINE const char *upb_fielddef_name(const upb_fielddef *f) { - return upb_def_fullname(UPB_UPCAST(f)); -} -INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; } -INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { - return f->msgdef; -} -INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) { - return f->accessor; -} - +// From upb_refcounted. +bool upb_fielddef_isfrozen(const upb_fielddef *f); +void upb_fielddef_ref(const upb_fielddef *f, const void *owner); +void upb_fielddef_unref(const upb_fielddef *f, const void *owner); +void upb_fielddef_donateref( + const upb_fielddef *f, const void *from, const void *to); +void upb_fielddef_checkref(const upb_fielddef *f, const void *owner); + +// From upb_def. +const char *upb_fielddef_fullname(const upb_fielddef *f); +bool upb_fielddef_setfullname(upb_fielddef *f, const char *fullname); + +upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f); +upb_label_t upb_fielddef_label(const upb_fielddef *f); +uint32_t upb_fielddef_number(const upb_fielddef *f); +const char *upb_fielddef_name(const upb_fielddef *f); +const upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f); +upb_msgdef *upb_fielddef_msgdef_mutable(upb_fielddef *f); bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type); bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); -void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit); -void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset); -// TODO(haberman): need a way of keeping the fval alive even if some handlers -// outlast the fielddef. -void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); - -// "Number" and "fullname" must be set before the fielddef is added to a msgdef. -// For the moment we do not allow these to be set once the fielddef is added to -// a msgdef -- this could be relaxed in the future. -bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); -INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) { - return upb_def_setfullname(UPB_UPCAST(f), name); -} - -// Field type tests. /////////////////////////////////////////////////////////// - -INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} -INLINE bool upb_isstringtype(upb_fieldtype_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} -INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { - return !upb_issubmsgtype(type) && !upb_isstringtype(type); -} -INLINE bool upb_issubmsg(const upb_fielddef *f) { - return upb_issubmsgtype(f->type); -} -INLINE bool upb_isstring(const upb_fielddef *f) { - return upb_isstringtype(f->type); -} -INLINE bool upb_isseq(const upb_fielddef *f) { - return f->label == UPB_LABEL(REPEATED); -} - -// Default value. ////////////////////////////////////////////////////////////// - -// Returns the default value for this fielddef, which may either be something -// the client set explicitly or the "default default" (0 for numbers, empty for -// strings). The field's type indicates the type of the returned value, except -// for enum fields that are still mutable. -// -// For enums the default can be set either numerically or symbolically -- the -// upb_fielddef_default_is_symbolic() function below will indicate which it is. -// For string defaults, the value will be a upb_byteregion which is invalidated -// by any other non-const call on this object. Once the fielddef is finalized, -// symbolic enum defaults are resolved, so finalized enum fielddefs always have -// a default of type int32. -INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { - return f->defaultval; -} -// Sets default value for the field. For numeric types, use -// upb_fielddef_setdefault(), and "value" must match the type of the field. -// For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may -// use either, since the default may be set either numerically or symbolically. -// -// NOTE: May only be called for fields whose type has already been set. -// Also, will be reset to default if the field's type is set again. +bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number); +bool upb_fielddef_setname(upb_fielddef *f, const char *name); +bool upb_fielddef_issubmsg(const upb_fielddef *f); +bool upb_fielddef_isstring(const upb_fielddef *f); +bool upb_fielddef_isseq(const upb_fielddef *f); +bool upb_fielddef_isprimitive(const upb_fielddef *f); +upb_value upb_fielddef_default(const upb_fielddef *f); void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str); - -// The results of this function are only meaningful for mutable enum fields, -// which can have a default specified either as an integer or as a string. If -// this returns true, the default returned from upb_fielddef_default() is a -// string, otherwise it is an integer. -INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { - assert(f->type == UPB_TYPE(ENUM)); - return f->default_is_string; -} - -// Subdef. ///////////////////////////////////////////////////////////////////// - -// Submessage and enum fields must reference a "subdef", which is the -// upb_msgdef or upb_enumdef that defines their type. Note that when the -// fielddef is mutable it may not have a subdef *yet*, but this function still -// returns true to indicate that the field's type requires a subdef. -INLINE bool upb_hassubdef(const upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); -} - -// Before a fielddef is finalized, its subdef may be set either directly (with -// a upb_def*) or symbolically. Symbolic refs must be resolved before the -// containing msgdef can be finalized (see upb_resolve() above). The client is -// responsible for making sure that "subdef" lives until this fielddef is -// finalized or deleted. -// -// Both methods require that upb_hassubdef(f) (so the type must be set prior -// to calling these methods). Returns false if this is not the case, or if -// the given subdef is not of the correct type. The subtype is reset if the -// field's type is changed. -bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef); -bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name); - -// Returns the enum or submessage def or symbolic name for this field, if any. -// Requires that upb_hassubdef(f). Returns NULL if the subdef has not been set -// or if you ask for a subtype name when the subtype is currently set -// symbolically (or vice-versa). To access the subtype's name for a linked -// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)). -// -// Caller does *not* own a ref on the returned def or string. -// upb_fielddef_subtypename() is non-const because finalized defs will never -// have a symbolic reference (they must be resolved before the msgdef can be -// finalized). -upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f); +bool upb_fielddef_default_is_symbolic(const upb_fielddef *f); +bool upb_fielddef_resolvedefault(upb_fielddef *f); +bool upb_fielddef_hassubdef(const upb_fielddef *f); +bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef); +bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name); const upb_def *upb_fielddef_subdef(const upb_fielddef *f); -const char *upb_fielddef_subtypename(upb_fielddef *f); +const char *upb_fielddef_subdefname(const upb_fielddef *f); +#ifdef __cplusplus +} // extern "C" +#endif -/* upb_msgdef *****************************************************************/ +/* upb::MessageDef ************************************************************/ + +typedef upb_inttable_iter upb_msg_iter; + +#ifdef __cplusplus // Structure that describes a single .proto message type. -typedef struct _upb_msgdef { +class upb::MessageDef { + public: + // Returns NULL if memory allocation failed. + static MessageDef* New(const void *owner); + + // Though not declared as such in C++, upb::Def is the base of MessageDef and + // we can upcast to it. + Def* Upcast(); + const Def* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Functionality from upb::Def. + const char *full_name() const; + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + // The number of fields that belong to the MessageDef. + int field_count() const; + + // Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that + // the msgdef and all the fielddefs are mutable. The fielddef's name and + // number must be set, and the message may not already contain any field with + // this name or number, and this fielddef may not be part of another message. + // In error cases false is returned and the msgdef is unchanged. On success, + // the caller donates a ref from ref_donor (if non-NULL). + bool AddField(upb_fielddef *f, const void *ref_donor); + + // These return NULL if the field is not found. + FieldDef* FindFieldByNumber(uint32_t number); + FieldDef* FieldFieldByName(const char *name); + const FieldDef* FindFieldByNumber(uint32_t number) const; + const FieldDef* FieldFieldByName(const char *name) const; + + // Returns a new msgdef that is a copy of the given msgdef (and a copy of all + // the fields) but with any references to submessages broken and replaced + // with just the name of the submessage. Returns NULL if memory allocation + // failed. + // + // TODO(haberman): which is more useful, keeping fields resolved or + // unresolving them? If there's no obvious answer, Should this functionality + // just be moved into symtab.c? + MessageDef* Dup(const void *owner) const; + + // Iteration over fields. The order is undefined. + class Iterator { + public: + explicit Iterator(MessageDef* md); + + FieldDef* field(); + bool Done(); + void Next(); + + private: + upb_msg_iter iter_; + }; + + // For iterating over the fields of a const MessageDef. + class ConstIterator { + public: + explicit ConstIterator(const MessageDef* md); + + const FieldDef* field(); + bool Done(); + void Next(); + + private: + upb_msg_iter iter_; + }; + + private: + UPB_DISALLOW_POD_OPS(MessageDef); + +#else +struct upb_msgdef { +#endif upb_def base; + size_t selector_count; // Tables for looking up fields by number and name. upb_inttable itof; // int to field upb_strtable ntof; // name to field - // The following fields may be modified while mutable. - uint16_t size; - uint8_t hasbit_bytes; - // The range of tag numbers used to store extensions. - uint32_t extstart, extend; - // Used for proto2 integration. - const void *prototype; -} upb_msgdef; + // TODO(haberman): proper extension ranges (there can be multiple). +}; + +#define UPB_MSGDEF_INIT(name, itof, ntof, selector_count) \ + {UPB_DEF_INIT(name, UPB_DEF_MSG), selector_count, itof, ntof} +#ifdef __cplusplus +extern "C" { +#endif // Returns NULL if memory allocation failed. upb_msgdef *upb_msgdef_new(const void *owner); -INLINE void upb_msgdef_ref(const upb_msgdef *md, const void *owner) { - upb_def_ref(UPB_UPCAST(md), owner); -} -INLINE void upb_msgdef_unref(const upb_msgdef *md, const void *owner) { - upb_def_unref(UPB_UPCAST(md), owner); -} +// From upb_refcounted. +bool upb_msgdef_isfrozen(const upb_msgdef *m); +void upb_msgdef_ref(const upb_msgdef *m, const void *owner); +void upb_msgdef_unref(const upb_msgdef *m, const void *owner); +void upb_msgdef_donateref( + const upb_msgdef *m, const void *from, const void *to); +void upb_msgdef_checkref(const upb_msgdef *m, const void *owner); -// Returns a new msgdef that is a copy of the given msgdef (and a copy of all -// the fields) but with any references to submessages broken and replaced with -// just the name of the submessage. Returns NULL if memory allocation failed. -// This can be put back into another symtab and the names will be re-resolved -// in the new context. -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); +// From upb_def. +const char *upb_msgdef_fullname(const upb_msgdef *m); +bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname); -// Read accessors. May be called at any time. -INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; } -INLINE uint8_t upb_msgdef_hasbit_bytes(const upb_msgdef *m) { - return m->hasbit_bytes; -} -INLINE uint32_t upb_msgdef_extstart(const upb_msgdef *m) { return m->extstart; } -INLINE uint32_t upb_msgdef_extend(const upb_msgdef *m) { return m->extend; } - -// Write accessors. May only be called before the msgdef is in a symtab. -void upb_msgdef_setsize(upb_msgdef *m, uint16_t size); -void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes); -bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end); - -// Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that the -// msgdef and all the fielddefs are mutable. The fielddef's name and number -// must be set, and the message may not already contain any field with this -// name or number, and this fielddef may not be part of another message. In -// error cases false is returned and the msgdef is unchanged. -// -// On success, the msgdef takes a ref on the fielddef so the caller needn't -// worry about continuing to keep it alive (however the reverse is not true; -// refs on the fielddef will *not* keep the msgdef alive). If ref_donor is -// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef, -// otherwise caller retains its reference(s) on the defs in f. +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); bool upb_msgdef_addfields( upb_msgdef *m, upb_fielddef *const *f, int n, const void *ref_donor); -INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, - const void *ref_donor) { - return upb_msgdef_addfields(m, &f, 1, ref_donor); -} - -// Looks up a field by name or number. While these are written to be as fast -// as possible, it will still be faster to cache the results of this lookup if -// possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { - const upb_value *val = upb_inttable_lookup32(&m->itof, i); - return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; -} - -INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { - const upb_value *val = upb_strtable_lookup(&m->ntof, name); - return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; -} - -INLINE int upb_msgdef_numfields(const upb_msgdef *m) { - return upb_strtable_count(&m->ntof); -} - -// Iteration over fields. The order is undefined. -// TODO: the iteration should be in field order. -// Iterators are invalidated when a field is added or removed. -// upb_msg_iter i; -// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { -// upb_fielddef *f = upb_msg_iter_field(&i); -// // ... -// } -typedef upb_inttable_iter upb_msg_iter; - +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor); +const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i); +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name); +upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i); +upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name); +int upb_msgdef_numfields(const upb_msgdef *m); + +// upb_msg_iter i; +// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { +// upb_fielddef *f = upb_msg_iter_field(&i); +// // ... +// } void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m); void upb_msg_next(upb_msg_iter *iter); -INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); } +bool upb_msg_done(upb_msg_iter *iter); +upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter); +#ifdef __cplusplus +} // extern "C +#endif -// Iterator accessor. -INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) { - return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); -} +/* upb::EnumDef ***************************************************************/ -/* upb_enumdef ****************************************************************/ +typedef upb_strtable_iter upb_enum_iter; -typedef struct _upb_enumdef { +#ifdef __cplusplus + +class upb::EnumDef { + public: + // Returns NULL if memory allocation failed. + static EnumDef* New(const void *owner); + + // Though not declared as such in C++, upb::Def is the base of EnumDef and we + // can upcast to it. + Def* Upcast(); + const Def* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Functionality from upb::Def. + const char *full_name() const; + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + // The value that is used as the default when no field default is specified. + int32_t default_value() const; + void set_default_value(int32_t val); + + // Returns the number of values currently defined in the enum. Note that + // multiple names can refer to the same number, so this may be greater than + // the total number of unique numbers. + int value_count() const; + + // Adds a single name/number pair to the enum. Fails if this name has + // already been used by another value. + bool AddValue(const char* name, int32_t num, Status* status); + bool AddValue(const std::string& name, int32_t num, Status* status); + + // Lookups from name to integer, returning true if found. + bool FindValueByName(const char* name, int32_t* num) const; + + // Finds the name corresponding to the given number, or NULL if none was + // found. If more than one name corresponds to this number, returns the + // first one that was added. + const char* FindValueByNumber(int32_t num) const; + + // Returns a new EnumDef with all the same values. The new EnumDef will be + // owned by the given owner. + EnumDef* Dup(const void *owner) const; + + // Iteration over name/value pairs. The order is undefined. + // Adding an enum val invalidates any iterators. + class Iterator { + public: + explicit Iterator(const EnumDef*); + + int32_t number(); + const char* name(); + bool Done(); + void Next(); + + private: + upb_enum_iter iter_; + }; + + private: + UPB_DISALLOW_POD_OPS(EnumDef); + +#else +struct upb_enumdef { +#endif upb_def base; upb_strtable ntoi; upb_inttable iton; int32_t defaultval; -} upb_enumdef; +}; -// Returns NULL if memory allocation failed. +#define UPB_ENUMDEF_INIT(name, ntoi, iton, defaultval) \ + {UPB_DEF_INIT(name, UPB_DEF_ENUM), ntoi, iton, defaultval} + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif upb_enumdef *upb_enumdef_new(const void *owner); -INLINE void upb_enumdef_ref(const upb_enumdef *e, const void *owner) { - upb_def_ref(&e->base, owner); -} -INLINE void upb_enumdef_unref(const upb_enumdef *e, const void *owner) { - upb_def_unref(&e->base, owner); -} upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner); -INLINE int32_t upb_enumdef_default(const upb_enumdef *e) { - return e->defaultval; -} - -// May only be set if upb_def_ismutable(e). -void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); - -// Returns the number of values currently defined in the enum. Note that -// multiple names can refer to the same number, so this may be greater than the -// total number of unique numbers. -INLINE int upb_enumdef_numvals(const upb_enumdef *e) { - return upb_strtable_count(&e->ntoi); -} +// From upb_refcounted. +void upb_enumdef_unref(const upb_enumdef *e, const void *owner); +bool upb_enumdef_isfrozen(const upb_enumdef *e); +void upb_enumdef_ref(const upb_enumdef *e, const void *owner); +void upb_enumdef_donateref( + const upb_enumdef *m, const void *from, const void *to); +void upb_enumdef_checkref(const upb_enumdef *e, const void *owner); -// Adds a value to the enumdef. Requires that no existing val has this name, -// but duplicate numbers are allowed. May only be called if the enumdef is -// mutable. Returns false if the existing name is used, or if "name" is not a -// valid label, or on memory allocation failure (we may want to distinguish -// these failure cases in the future). -bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num); +// From upb_def. +const char *upb_enumdef_fullname(const upb_enumdef *e); +bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname); -// Lookups from name to integer, returning true if found. +int32_t upb_enumdef_default(const upb_enumdef *e); +void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); +int upb_enumdef_numvals(const upb_enumdef *e); +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, + upb_status *status); bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num); - -// Finds the name corresponding to the given number, or NULL if none was found. -// If more than one name corresponds to this number, returns the first one that -// was added. const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); -// Iteration over name/value pairs. The order is undefined. -// Adding an enum val invalidates any iterators. -// upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { -// // ... -// } -typedef upb_strtable_iter upb_enum_iter; - +// upb_enum_iter i; +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// // ... +// } void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e); void upb_enum_next(upb_enum_iter *iter); bool upb_enum_done(upb_enum_iter *iter); - -// Iterator accessors. -INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) { - return upb_strtable_iter_key(iter); -} -INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) { - return upb_value_getint32(upb_strtable_iter_value(iter)); -} - - -/* upb_symtab *****************************************************************/ - -// A symtab (symbol table) stores a name->def map of upb_defs. Clients could -// always create such tables themselves, but upb_symtab has logic for resolving -// symbolic references, which is nontrivial. -typedef struct { - upb_refcount refcount; - upb_strtable symtab; -} upb_symtab; - -upb_symtab *upb_symtab_new(const void *owner); -void upb_symtab_ref(const upb_symtab *s, const void *owner); -void upb_symtab_unref(const upb_symtab *s, const void *owner); -void upb_symtab_donateref( - const upb_symtab *s, const void *from, const void *to); - -// Resolves the given symbol using the rules described in descriptor.proto, -// namely: -// -// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like -// scoping rules are used to find the type (i.e. first the nested types -// within this message are searched, then within the parent, on up to the -// root namespace). -// -// If a def is found, the caller owns one ref on the returned def, owned by -// owner. Otherwise returns NULL. -const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym, const void *owner); - -// Finds an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def, owned by owner. Otherwise -// returns NULL. -const upb_def *upb_symtab_lookup( - const upb_symtab *s, const char *sym, const void *owner); -const upb_msgdef *upb_symtab_lookupmsg( - const upb_symtab *s, const char *sym, const void *owner); - -// Gets an array of pointers to all currently active defs in this symtab. The -// caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside (owned by owner). If type is UPB_DEF_ANY then defs of -// all types are returned, otherwise only defs of the required type are -// returned. -const upb_def **upb_symtab_getdefs( - const upb_symtab *s, int *n, upb_deftype_t type, const void *owner); - -// Adds the given defs to the symtab, resolving all symbols (including enum -// default values) and finalizing the defs. Only one def per name may be in -// the list, but defs can replace existing defs in the symtab. All defs must -// have a name -- anonymous defs are not allowed. Anonymous defs can still be -// finalized by calling upb_def_finalize() directly. -// -// Any existing defs that can reach defs that are being replaced will -// themselves be replaced also, so that the resulting set of defs is fully -// consistent. -// -// This logic implemented in this method is a convenience; ultimately it calls -// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and -// upb_finalize(), any of which the client could call themself. However, since -// the logic for doing so is nontrivial, we provide it here. -// -// The entire operation either succeeds or fails. If the operation fails, the -// symtab is unchanged, false is returned, and status indicates the error. The -// caller passes a ref on all defs to the symtab (even if the operation fails). -bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, - upb_status *status); +const char *upb_enum_iter_name(upb_enum_iter *iter); +int32_t upb_enum_iter_number(upb_enum_iter *iter); +#ifdef __cplusplus +} // extern "C" +#endif /* upb_def casts **************************************************************/ @@ -592,31 +685,349 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, // Downcasts, for when some wants to assert that a def is of a particular type. // These are only checked if we are building debug. #define UPB_DEF_CASTS(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (struct _upb_ ## lower*)def; \ + INLINE const upb_ ## lower *upb_dyncast_ ## lower(const upb_def *def) { \ + if (upb_def_type(def) != UPB_DEF_ ## upper) return NULL; \ + return (upb_ ## lower*)def; \ } \ - INLINE const struct _upb_ ## lower *upb_dyncast_ ## lower ## _const(const upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (const struct _upb_ ## lower*)def; \ + INLINE const upb_ ## lower *upb_downcast_ ## lower(const upb_def *def) { \ + assert(upb_def_type(def) == UPB_DEF_ ## upper); \ + return (const upb_ ## lower*)def; \ } \ - INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (struct _upb_ ## lower*)def; \ + INLINE upb_ ## lower *upb_dyncast_ ## lower ## _mutable(upb_def *def) { \ + return (upb_ ## lower*)upb_dyncast_ ## lower(def); \ } \ - INLINE const struct _upb_ ## lower *upb_downcast_ ## lower ## _const(const upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (const struct _upb_ ## lower*)def; \ + INLINE upb_ ## lower *upb_downcast_ ## lower ## _mutable(upb_def *def) { \ + return (upb_ ## lower*)upb_downcast_ ## lower(def); \ } UPB_DEF_CASTS(msgdef, MSG); UPB_DEF_CASTS(fielddef, FIELD); UPB_DEF_CASTS(enumdef, ENUM); -UPB_DEF_CASTS(svcdef, SERVICE); #undef UPB_DEF_CASTS #ifdef __cplusplus -} /* extern "C" */ + +INLINE const char *upb_safecstr(const std::string& str) { + assert(str.size() == std::strlen(str.c_str())); + return str.c_str(); +} + +// Inline C++ wrappers. +namespace upb { + +inline Def* Def::Dup(const void *owner) const { + return upb_def_dup(this, owner); +} +inline RefCounted* Def::Upcast() { + return upb_upcast(this); +} +inline const RefCounted* Def::Upcast() const { + return upb_upcast(this); +} +inline bool Def::IsFrozen() const { + return upb_def_isfrozen(this); +} +inline void Def::Ref(const void* owner) const { + upb_def_ref(this, owner); +} +inline void Def::Unref(const void* owner) const { + upb_def_unref(this, owner); +} +inline void Def::DonateRef(const void *from, const void *to) const { + upb_def_donateref(this, from, to); +} +inline void Def::CheckRef(const void *owner) const { + upb_def_checkref(this, owner); +} +inline Def::Type Def::def_type() const { + return upb_def_type(this); +} +inline const char *Def::full_name() const { + return upb_def_fullname(this); +} +inline bool Def::set_full_name(const char *fullname) { + return upb_def_setfullname(this, fullname); +} +inline bool Def::set_full_name(const std::string& fullname) { + return upb_def_setfullname(this, upb_safecstr(fullname)); +} +inline bool Def::Freeze(Def *const*defs, int n, Status *status) { + return upb_def_freeze(defs, n, status); +} +inline bool Def::Freeze(const std::vector& defs, Status *status) { + return upb_def_freeze((Def*const*)&defs[0], defs.size(), status); +} + +inline FieldDef* FieldDef::New(const void *owner) { + return upb_fielddef_new(owner); +} +inline FieldDef* FieldDef::Dup(const void *owner) const { + return upb_fielddef_dup(this, owner); +} +inline Def* FieldDef::Upcast() { + return upb_upcast(this); +} +inline const Def* FieldDef::Upcast() const { + return upb_upcast(this); +} +inline bool FieldDef::IsFrozen() const { + return upb_fielddef_isfrozen(this); +} +inline void FieldDef::Ref(const void* owner) const { + upb_fielddef_ref(this, owner); +} +inline void FieldDef::Unref(const void* owner) const { + upb_fielddef_unref(this, owner); +} +inline void FieldDef::DonateRef(const void *from, const void *to) const { + upb_fielddef_donateref(this, from, to); +} +inline void FieldDef::CheckRef(const void *owner) const { + upb_fielddef_checkref(this, owner); +} +inline const char *FieldDef::full_name() const { + return upb_fielddef_fullname(this); +} +inline bool FieldDef::set_full_name(const char *fullname) { + return upb_fielddef_setfullname(this, fullname); +} +inline bool FieldDef::set_full_name(const std::string& fullname) { + return upb_fielddef_setfullname(this, upb_safecstr(fullname)); +} +inline FieldDef::Type FieldDef::type() const { + return upb_fielddef_type(this); +} +inline FieldDef::Label FieldDef::label() const { + return upb_fielddef_label(this); +} +inline uint32_t FieldDef::number() const { + return upb_fielddef_number(this); +} +inline const char *FieldDef::name() const { + return upb_fielddef_name(this); +} +inline const MessageDef* FieldDef::message_def() const { + return upb_fielddef_msgdef(this); +} +inline bool FieldDef::set_number(uint32_t number) { + return upb_fielddef_setnumber(this, number); +} +inline bool FieldDef::set_name(const char *name) { + return upb_fielddef_setname(this, name); +} +inline bool FieldDef::set_name(const std::string& name) { + return upb_fielddef_setname(this, upb_safecstr(name)); +} +inline bool FieldDef::set_type(upb_fieldtype_t type) { + return upb_fielddef_settype(this, type); +} +inline bool FieldDef::set_label(upb_label_t label) { + return upb_fielddef_setlabel(this, label); +} +inline bool FieldDef::IsSubMessage() const { + return upb_fielddef_issubmsg(this); +} +inline bool FieldDef::IsString() const { + return upb_fielddef_isstring(this); +} +inline bool FieldDef::IsSequence() const { + return upb_fielddef_isseq(this); +} +inline Value FieldDef::default_value() const { + return upb_fielddef_default(this); +} +inline void FieldDef::set_default_value(Value value) { + upb_fielddef_setdefault(this, value); +} +inline bool FieldDef::set_default_string(const void *str, size_t len) { + return upb_fielddef_setdefaultstr(this, str, len); +} +inline bool FieldDef::set_default_string(const std::string& str) { + return upb_fielddef_setdefaultstr(this, str.c_str(), str.size()); +} +inline void FieldDef::set_default_cstr(const char *str) { + return upb_fielddef_setdefaultcstr(this, str); +} +inline bool FieldDef::IsDefaultSymbolic() const { + return upb_fielddef_default_is_symbolic(this); +} +inline bool FieldDef::ResolveDefault() { + return upb_fielddef_resolvedefault(this); +} +inline bool FieldDef::HasSubDef() const { + return upb_fielddef_hassubdef(this); +} +inline const Def* FieldDef::subdef() const { + return upb_fielddef_subdef(this); +} +inline const char* FieldDef::subdef_name() const { + return upb_fielddef_subdefname(this); +} +inline bool FieldDef::set_subdef(const Def* subdef) { + return upb_fielddef_setsubdef(this, subdef); +} +inline bool FieldDef::set_subdef_name(const char* name) { + return upb_fielddef_setsubdefname(this, name); +} +inline bool FieldDef::set_subdef_name(const std::string& name) { + return upb_fielddef_setsubdefname(this, upb_safecstr(name)); +} + +inline MessageDef* MessageDef::New(const void *owner) { + return upb_msgdef_new(owner); +} +inline Def* MessageDef::Upcast() { + return upb_upcast(this); +} +inline const Def* MessageDef::Upcast() const { + return upb_upcast(this); +} +inline bool MessageDef::IsFrozen() const { + return upb_msgdef_isfrozen(this); +} +inline void MessageDef::Ref(const void* owner) const { + return upb_msgdef_ref(this, owner); +} +inline void MessageDef::Unref(const void* owner) const { + return upb_msgdef_unref(this, owner); +} +inline void MessageDef::DonateRef(const void *from, const void *to) const { + return upb_msgdef_donateref(this, from, to); +} +inline void MessageDef::CheckRef(const void *owner) const { + return upb_msgdef_checkref(this, owner); +} +inline const char *MessageDef::full_name() const { + return upb_msgdef_fullname(this); +} +inline bool MessageDef::set_full_name(const char *fullname) { + return upb_msgdef_setfullname(this, fullname); +} +inline bool MessageDef::set_full_name(const std::string& fullname) { + return upb_msgdef_setfullname(this, upb_safecstr(fullname)); +} +inline int MessageDef::field_count() const { + return upb_msgdef_numfields(this); +} +inline bool MessageDef::AddField(upb_fielddef *f, const void *ref_donor) { + return upb_msgdef_addfield(this, f, ref_donor); +} +inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) { + return upb_msgdef_itof_mutable(this, number); +} +inline FieldDef* MessageDef::FieldFieldByName(const char *name) { + return upb_msgdef_ntof_mutable(this, name); +} +inline const FieldDef* MessageDef::FindFieldByNumber(uint32_t number) const { + return upb_msgdef_itof(this, number); +} +inline const FieldDef* MessageDef::FieldFieldByName(const char *name) const { + return upb_msgdef_ntof(this, name); +} +inline MessageDef* MessageDef::Dup(const void *owner) const { + return upb_msgdef_dup(this, owner); +} + +inline MessageDef::Iterator::Iterator(MessageDef* md) { + upb_msg_begin(&iter_, md); +} +inline FieldDef* MessageDef::Iterator::field() { + return upb_msg_iter_field(&iter_); +} +inline bool MessageDef::Iterator::Done() { + return upb_msg_done(&iter_); +} +inline void MessageDef::Iterator::Next() { + return upb_msg_next(&iter_); +} + +inline MessageDef::ConstIterator::ConstIterator(const MessageDef* md) { + upb_msg_begin(&iter_, md); +} +inline const FieldDef* MessageDef::ConstIterator::field() { + return upb_msg_iter_field(&iter_); +} +inline bool MessageDef::ConstIterator::Done() { + return upb_msg_done(&iter_); +} +inline void MessageDef::ConstIterator::Next() { + return upb_msg_next(&iter_); +} + +inline EnumDef* EnumDef::New(const void *owner) { + return upb_enumdef_new(owner); +} +inline Def* EnumDef::Upcast() { + return upb_upcast(this); +} +inline const Def* EnumDef::Upcast() const { + return upb_upcast(this); +} +inline bool EnumDef::IsFrozen() const { + return upb_enumdef_isfrozen(this); +} +inline void EnumDef::Ref(const void* owner) const { + return upb_enumdef_ref(this, owner); +} +inline void EnumDef::Unref(const void* owner) const { + return upb_enumdef_unref(this, owner); +} +inline void EnumDef::DonateRef(const void *from, const void *to) const { + return upb_enumdef_donateref(this, from, to); +} +inline void EnumDef::CheckRef(const void *owner) const { + return upb_enumdef_checkref(this, owner); +} +inline const char *EnumDef::full_name() const { + return upb_enumdef_fullname(this); +} +inline bool EnumDef::set_full_name(const char *fullname) { + return upb_enumdef_setfullname(this, fullname); +} +inline bool EnumDef::set_full_name(const std::string& fullname) { + return upb_enumdef_setfullname(this, upb_safecstr(fullname)); +} +inline int32_t EnumDef::default_value() const { + return upb_enumdef_default(this); +} +inline void EnumDef::set_default_value(int32_t val) { + upb_enumdef_setdefault(this, val); +} +inline int EnumDef::value_count() const { + return upb_enumdef_numvals(this); +} +inline bool EnumDef::AddValue(const char* name, int32_t num, Status* status) { + return upb_enumdef_addval(this, name, num, status); +} +inline bool EnumDef::AddValue( + const std::string& name, int32_t num, Status* status) { + return upb_enumdef_addval(this, upb_safecstr(name), num, status); +} +inline bool EnumDef::FindValueByName(const char* name, int32_t* num) const { + return upb_enumdef_ntoi(this, name, num); +} +inline const char* EnumDef::FindValueByNumber(int32_t num) const { + return upb_enumdef_iton(this, num); +} +inline EnumDef* EnumDef::Dup(const void *owner) const { + return upb_enumdef_dup(this, owner); +} + +inline EnumDef::Iterator::Iterator(const EnumDef* e) { + upb_enum_begin(&iter_, e); +} +inline int32_t EnumDef::Iterator::number() { + return upb_enum_iter_number(&iter_); +} +inline const char* EnumDef::Iterator::name() { + return upb_enum_iter_name(&iter_); +} +inline bool EnumDef::Iterator::Done() { + return upb_enum_done(&iter_); +} +inline void EnumDef::Iterator::Next() { + return upb_enum_next(&iter_); +} +} // namespace upb #endif #endif /* UPB_DEF_H_ */ diff --git a/upb/descriptor.proto b/upb/descriptor.proto deleted file mode 100644 index 233f879..0000000 --- a/upb/descriptor.proto +++ /dev/null @@ -1,533 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// http://code.google.com/p/protobuf/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Author: kenton@google.com (Kenton Varda) -// Based on original Protocol Buffers design by -// Sanjay Ghemawat, Jeff Dean, and others. -// -// The messages in this file describe the definitions found in .proto files. -// A valid .proto file can be translated directly to a FileDescriptorProto -// without any other information (e.g. without reading its imports). - - - -package google.protobuf; -option java_package = "com.google.protobuf"; -option java_outer_classname = "DescriptorProtos"; - -// descriptor.proto must be optimized for speed because reflection-based -// algorithms don't work during bootstrapping. -option optimize_for = SPEED; - -// The protocol compiler can output a FileDescriptorSet containing the .proto -// files it parses. -message FileDescriptorSet { - repeated FileDescriptorProto file = 1; -} - -// Describes a complete .proto file. -message FileDescriptorProto { - optional string name = 1; // file name, relative to root of source tree - optional string package = 2; // e.g. "foo", "foo.bar", etc. - - // Names of files imported by this file. - repeated string dependency = 3; - - // All top-level definitions in this file. - repeated DescriptorProto message_type = 4; - repeated EnumDescriptorProto enum_type = 5; - repeated ServiceDescriptorProto service = 6; - repeated FieldDescriptorProto extension = 7; - - optional FileOptions options = 8; - - // This field contains optional information about the original source code. - // You may safely remove this entire field whithout harming runtime - // functionality of the descriptors -- the information is needed only by - // development tools. - optional SourceCodeInfo source_code_info = 9; -} - -// Describes a message type. -message DescriptorProto { - optional string name = 1; - - repeated FieldDescriptorProto field = 2; - repeated FieldDescriptorProto extension = 6; - - repeated DescriptorProto nested_type = 3; - repeated EnumDescriptorProto enum_type = 4; - - message ExtensionRange { - optional int32 start = 1; - optional int32 end = 2; - } - repeated ExtensionRange extension_range = 5; - - optional MessageOptions options = 7; -} - -// Describes a field within a message. -message FieldDescriptorProto { - enum Type { - // 0 is reserved for errors. - // Order is weird for historical reasons. - TYPE_DOUBLE = 1; - TYPE_FLOAT = 2; - TYPE_INT64 = 3; // Not ZigZag encoded. Negative numbers - // take 10 bytes. Use TYPE_SINT64 if negative - // values are likely. - TYPE_UINT64 = 4; - TYPE_INT32 = 5; // Not ZigZag encoded. Negative numbers - // take 10 bytes. Use TYPE_SINT32 if negative - // values are likely. - TYPE_FIXED64 = 6; - TYPE_FIXED32 = 7; - TYPE_BOOL = 8; - TYPE_STRING = 9; - TYPE_GROUP = 10; // Tag-delimited aggregate. - TYPE_MESSAGE = 11; // Length-delimited aggregate. - - // New in version 2. - TYPE_BYTES = 12; - TYPE_UINT32 = 13; - TYPE_ENUM = 14; - TYPE_SFIXED32 = 15; - TYPE_SFIXED64 = 16; - TYPE_SINT32 = 17; // Uses ZigZag encoding. - TYPE_SINT64 = 18; // Uses ZigZag encoding. - }; - - enum Label { - // 0 is reserved for errors - LABEL_OPTIONAL = 1; - LABEL_REQUIRED = 2; - LABEL_REPEATED = 3; - // TODO(sanjay): Should we add LABEL_MAP? - }; - - optional string name = 1; - optional int32 number = 3; - optional Label label = 4; - - // If type_name is set, this need not be set. If both this and type_name - // are set, this must be either TYPE_ENUM or TYPE_MESSAGE. - optional Type type = 5; - - // For message and enum types, this is the name of the type. If the name - // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping - // rules are used to find the type (i.e. first the nested types within this - // message are searched, then within the parent, on up to the root - // namespace). - optional string type_name = 6; - - // For extensions, this is the name of the type being extended. It is - // resolved in the same manner as type_name. - optional string extendee = 2; - - // For numeric types, contains the original text representation of the value. - // For booleans, "true" or "false". - // For strings, contains the default text contents (not escaped in any way). - // For bytes, contains the C escaped value. All bytes >= 128 are escaped. - // TODO(kenton): Base-64 encode? - optional string default_value = 7; - - optional FieldOptions options = 8; -} - -// Describes an enum type. -message EnumDescriptorProto { - optional string name = 1; - - repeated EnumValueDescriptorProto value = 2; - - optional EnumOptions options = 3; -} - -// Describes a value within an enum. -message EnumValueDescriptorProto { - optional string name = 1; - optional int32 number = 2; - - optional EnumValueOptions options = 3; -} - -// Describes a service. -message ServiceDescriptorProto { - optional string name = 1; - repeated MethodDescriptorProto method = 2; - - optional ServiceOptions options = 3; -} - -// Describes a method of a service. -message MethodDescriptorProto { - optional string name = 1; - - // Input and output type names. These are resolved in the same way as - // FieldDescriptorProto.type_name, but must refer to a message type. - optional string input_type = 2; - optional string output_type = 3; - - optional MethodOptions options = 4; -} - -// =================================================================== -// Options - -// Each of the definitions above may have "options" attached. These are -// just annotations which may cause code to be generated slightly differently -// or may contain hints for code that manipulates protocol messages. -// -// Clients may define custom options as extensions of the *Options messages. -// These extensions may not yet be known at parsing time, so the parser cannot -// store the values in them. Instead it stores them in a field in the *Options -// message called uninterpreted_option. This field must have the same name -// across all *Options messages. We then use this field to populate the -// extensions when we build a descriptor, at which point all protos have been -// parsed and so all extensions are known. -// -// Extension numbers for custom options may be chosen as follows: -// * For options which will only be used within a single application or -// organization, or for experimental options, use field numbers 50000 -// through 99999. It is up to you to ensure that you do not use the -// same number for multiple options. -// * For options which will be published and used publicly by multiple -// independent entities, e-mail kenton@google.com to reserve extension -// numbers. Simply tell me how many you need and I'll send you back a -// set of numbers to use -- there's no need to explain how you intend to -// use them. If this turns out to be popular, a web service will be set up -// to automatically assign option numbers. - - -message FileOptions { - - // Sets the Java package where classes generated from this .proto will be - // placed. By default, the proto package is used, but this is often - // inappropriate because proto packages do not normally start with backwards - // domain names. - optional string java_package = 1; - - - // If set, all the classes from the .proto file are wrapped in a single - // outer class with the given name. This applies to both Proto1 - // (equivalent to the old "--one_java_file" option) and Proto2 (where - // a .proto always translates to a single class, but you may want to - // explicitly choose the class name). - optional string java_outer_classname = 8; - - // If set true, then the Java code generator will generate a separate .java - // file for each top-level message, enum, and service defined in the .proto - // file. Thus, these types will *not* be nested inside the outer class - // named by java_outer_classname. However, the outer class will still be - // generated to contain the file's getDescriptor() method as well as any - // top-level extensions defined in the file. - optional bool java_multiple_files = 10 [default=false]; - - // If set true, then the Java code generator will generate equals() and - // hashCode() methods for all messages defined in the .proto file. This is - // purely a speed optimization, as the AbstractMessage base class includes - // reflection-based implementations of these methods. - optional bool java_generate_equals_and_hash = 20 [default=false]; - - // Generated classes can be optimized for speed or code size. - enum OptimizeMode { - SPEED = 1; // Generate complete code for parsing, serialization, - // etc. - CODE_SIZE = 2; // Use ReflectionOps to implement these methods. - LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime. - } - optional OptimizeMode optimize_for = 9 [default=SPEED]; - - - - - // Should generic services be generated in each language? "Generic" services - // are not specific to any particular RPC system. They are generated by the - // main code generators in each language (without additional plugins). - // Generic services were the only kind of service generation supported by - // early versions of proto2. - // - // Generic services are now considered deprecated in favor of using plugins - // that generate code specific to your particular RPC system. Therefore, - // these default to false. Old code which depends on generic services should - // explicitly set them to true. - optional bool cc_generic_services = 16 [default=false]; - optional bool java_generic_services = 17 [default=false]; - optional bool py_generic_services = 18 [default=false]; - - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -message MessageOptions { - // Set true to use the old proto1 MessageSet wire format for extensions. - // This is provided for backwards-compatibility with the MessageSet wire - // format. You should not use this for any other reason: It's less - // efficient, has fewer features, and is more complicated. - // - // The message must be defined exactly as follows: - // message Foo { - // option message_set_wire_format = true; - // extensions 4 to max; - // } - // Note that the message cannot have any defined fields; MessageSets only - // have extensions. - // - // All extensions of your type must be singular messages; e.g. they cannot - // be int32s, enums, or repeated messages. - // - // Because this is an option, the above two restrictions are not enforced by - // the protocol compiler. - optional bool message_set_wire_format = 1 [default=false]; - - // Disables the generation of the standard "descriptor()" accessor, which can - // conflict with a field of the same name. This is meant to make migration - // from proto1 easier; new code should avoid fields named "descriptor". - optional bool no_standard_descriptor_accessor = 2 [default=false]; - - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -message FieldOptions { - // The ctype option instructs the C++ code generator to use a different - // representation of the field than it normally would. See the specific - // options below. This option is not yet implemented in the open source - // release -- sorry, we'll try to include it in a future version! - optional CType ctype = 1 [default = STRING]; - enum CType { - // Default mode. - STRING = 0; - - CORD = 1; - - STRING_PIECE = 2; - } - // The packed option can be enabled for repeated primitive fields to enable - // a more efficient representation on the wire. Rather than repeatedly - // writing the tag and type for each element, the entire array is encoded as - // a single length-delimited blob. - optional bool packed = 2; - - - // Is this field deprecated? - // Depending on the target platform, this can emit Deprecated annotations - // for accessors, or it will be completely ignored; in the very least, this - // is a formalization for deprecating fields. - optional bool deprecated = 3 [default=false]; - - // EXPERIMENTAL. DO NOT USE. - // For "map" fields, the name of the field in the enclosed type that - // is the key for this map. For example, suppose we have: - // message Item { - // required string name = 1; - // required string value = 2; - // } - // message Config { - // repeated Item items = 1 [experimental_map_key="name"]; - // } - // In this situation, the map key for Item will be set to "name". - // TODO: Fully-implement this, then remove the "experimental_" prefix. - optional string experimental_map_key = 9; - - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -message EnumOptions { - - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -message EnumValueOptions { - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -message ServiceOptions { - - // Note: Field numbers 1 through 32 are reserved for Google's internal RPC - // framework. We apologize for hoarding these numbers to ourselves, but - // we were already using them long before we decided to release Protocol - // Buffers. - - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -message MethodOptions { - - // Note: Field numbers 1 through 32 are reserved for Google's internal RPC - // framework. We apologize for hoarding these numbers to ourselves, but - // we were already using them long before we decided to release Protocol - // Buffers. - - // The parser stores options it doesn't recognize here. See above. - repeated UninterpretedOption uninterpreted_option = 999; - - // Clients can define custom options in extensions of this message. See above. - extensions 1000 to max; -} - -// A message representing a option the parser does not recognize. This only -// appears in options protos created by the compiler::Parser class. -// DescriptorPool resolves these when building Descriptor objects. Therefore, -// options protos in descriptor objects (e.g. returned by Descriptor::options(), -// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions -// in them. -message UninterpretedOption { - // The name of the uninterpreted option. Each string represents a segment in - // a dot-separated name. is_extension is true iff a segment represents an - // extension (denoted with parentheses in options specs in .proto files). - // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents - // "foo.(bar.baz).qux". - message NamePart { - required string name_part = 1; - required bool is_extension = 2; - } - repeated NamePart name = 2; - - // The value of the uninterpreted option, in whatever type the tokenizer - // identified it as during parsing. Exactly one of these should be set. - optional string identifier_value = 3; - optional uint64 positive_int_value = 4; - optional int64 negative_int_value = 5; - optional double double_value = 6; - optional bytes string_value = 7; - optional string aggregate_value = 8; -} - -// =================================================================== -// Optional source code info - -// Encapsulates information about the original source file from which a -// FileDescriptorProto was generated. -message SourceCodeInfo { - // A Location identifies a piece of source code in a .proto file which - // corresponds to a particular definition. This information is intended - // to be useful to IDEs, code indexers, documentation generators, and similar - // tools. - // - // For example, say we have a file like: - // message Foo { - // optional string foo = 1; - // } - // Let's look at just the field definition: - // optional string foo = 1; - // ^ ^^ ^^ ^ ^^^ - // a bc de f ghi - // We have the following locations: - // span path represents - // [a,i) [ 4, 0, 2, 0 ] The whole field definition. - // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional). - // [c,d) [ 4, 0, 2, 0, 5 ] The type (string). - // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo). - // [g,h) [ 4, 0, 2, 0, 3 ] The number (1). - // - // Notes: - // - A location may refer to a repeated field itself (i.e. not to any - // particular index within it). This is used whenever a set of elements are - // logically enclosed in a single code segment. For example, an entire - // extend block (possibly containing multiple extension definitions) will - // have an outer location whose path refers to the "extensions" repeated - // field without an index. - // - Multiple locations may have the same path. This happens when a single - // logical declaration is spread out across multiple places. The most - // obvious example is the "extend" block again -- there may be multiple - // extend blocks in the same scope, each of which will have the same path. - // - A location's span is not always a subset of its parent's span. For - // example, the "extendee" of an extension declaration appears at the - // beginning of the "extend" block and is shared by all extensions within - // the block. - // - Just because a location's span is a subset of some other location's span - // does not mean that it is a descendent. For example, a "group" defines - // both a type and a field in a single declaration. Thus, the locations - // corresponding to the type and field and their components will overlap. - // - Code which tries to interpret locations should probably be designed to - // ignore those that it doesn't understand, as more types of locations could - // be recorded in the future. - repeated Location location = 1; - message Location { - // Identifies which part of the FileDescriptorProto was defined at this - // location. - // - // Each element is a field number or an index. They form a path from - // the root FileDescriptorProto to the place where the definition. For - // example, this path: - // [ 4, 3, 2, 7, 1 ] - // refers to: - // file.message_type(3) // 4, 3 - // .field(7) // 2, 7 - // .name() // 1 - // This is because FileDescriptorProto.message_type has field number 4: - // repeated DescriptorProto message_type = 4; - // and DescriptorProto.field has field number 2: - // repeated FieldDescriptorProto field = 2; - // and FieldDescriptorProto.name has field number 1: - // optional string name = 1; - // - // Thus, the above path gives the location of a field name. If we removed - // the last element: - // [ 4, 3, 2, 7 ] - // this path refers to the whole field declaration (from the beginning - // of the label to the terminating semicolon). - repeated int32 path = 1 [packed=true]; - - // Always has exactly three or four elements: start line, start column, - // end line (optional, otherwise assumed same as start line), end column. - // These are packed into a single field for efficiency. Note that line - // and column numbers are zero-based -- typically you will want to add - // 1 to each before displaying to a user. - repeated int32 span = 2 [packed=true]; - - // TODO(kenton): Record comments appearing before and after the - // declaration. - } -} diff --git a/upb/descriptor/descriptor.proto b/upb/descriptor/descriptor.proto new file mode 100644 index 0000000..233f879 --- /dev/null +++ b/upb/descriptor/descriptor.proto @@ -0,0 +1,533 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// The messages in this file describe the definitions found in .proto files. +// A valid .proto file can be translated directly to a FileDescriptorProto +// without any other information (e.g. without reading its imports). + + + +package google.protobuf; +option java_package = "com.google.protobuf"; +option java_outer_classname = "DescriptorProtos"; + +// descriptor.proto must be optimized for speed because reflection-based +// algorithms don't work during bootstrapping. +option optimize_for = SPEED; + +// The protocol compiler can output a FileDescriptorSet containing the .proto +// files it parses. +message FileDescriptorSet { + repeated FileDescriptorProto file = 1; +} + +// Describes a complete .proto file. +message FileDescriptorProto { + optional string name = 1; // file name, relative to root of source tree + optional string package = 2; // e.g. "foo", "foo.bar", etc. + + // Names of files imported by this file. + repeated string dependency = 3; + + // All top-level definitions in this file. + repeated DescriptorProto message_type = 4; + repeated EnumDescriptorProto enum_type = 5; + repeated ServiceDescriptorProto service = 6; + repeated FieldDescriptorProto extension = 7; + + optional FileOptions options = 8; + + // This field contains optional information about the original source code. + // You may safely remove this entire field whithout harming runtime + // functionality of the descriptors -- the information is needed only by + // development tools. + optional SourceCodeInfo source_code_info = 9; +} + +// Describes a message type. +message DescriptorProto { + optional string name = 1; + + repeated FieldDescriptorProto field = 2; + repeated FieldDescriptorProto extension = 6; + + repeated DescriptorProto nested_type = 3; + repeated EnumDescriptorProto enum_type = 4; + + message ExtensionRange { + optional int32 start = 1; + optional int32 end = 2; + } + repeated ExtensionRange extension_range = 5; + + optional MessageOptions options = 7; +} + +// Describes a field within a message. +message FieldDescriptorProto { + enum Type { + // 0 is reserved for errors. + // Order is weird for historical reasons. + TYPE_DOUBLE = 1; + TYPE_FLOAT = 2; + TYPE_INT64 = 3; // Not ZigZag encoded. Negative numbers + // take 10 bytes. Use TYPE_SINT64 if negative + // values are likely. + TYPE_UINT64 = 4; + TYPE_INT32 = 5; // Not ZigZag encoded. Negative numbers + // take 10 bytes. Use TYPE_SINT32 if negative + // values are likely. + TYPE_FIXED64 = 6; + TYPE_FIXED32 = 7; + TYPE_BOOL = 8; + TYPE_STRING = 9; + TYPE_GROUP = 10; // Tag-delimited aggregate. + TYPE_MESSAGE = 11; // Length-delimited aggregate. + + // New in version 2. + TYPE_BYTES = 12; + TYPE_UINT32 = 13; + TYPE_ENUM = 14; + TYPE_SFIXED32 = 15; + TYPE_SFIXED64 = 16; + TYPE_SINT32 = 17; // Uses ZigZag encoding. + TYPE_SINT64 = 18; // Uses ZigZag encoding. + }; + + enum Label { + // 0 is reserved for errors + LABEL_OPTIONAL = 1; + LABEL_REQUIRED = 2; + LABEL_REPEATED = 3; + // TODO(sanjay): Should we add LABEL_MAP? + }; + + optional string name = 1; + optional int32 number = 3; + optional Label label = 4; + + // If type_name is set, this need not be set. If both this and type_name + // are set, this must be either TYPE_ENUM or TYPE_MESSAGE. + optional Type type = 5; + + // For message and enum types, this is the name of the type. If the name + // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping + // rules are used to find the type (i.e. first the nested types within this + // message are searched, then within the parent, on up to the root + // namespace). + optional string type_name = 6; + + // For extensions, this is the name of the type being extended. It is + // resolved in the same manner as type_name. + optional string extendee = 2; + + // For numeric types, contains the original text representation of the value. + // For booleans, "true" or "false". + // For strings, contains the default text contents (not escaped in any way). + // For bytes, contains the C escaped value. All bytes >= 128 are escaped. + // TODO(kenton): Base-64 encode? + optional string default_value = 7; + + optional FieldOptions options = 8; +} + +// Describes an enum type. +message EnumDescriptorProto { + optional string name = 1; + + repeated EnumValueDescriptorProto value = 2; + + optional EnumOptions options = 3; +} + +// Describes a value within an enum. +message EnumValueDescriptorProto { + optional string name = 1; + optional int32 number = 2; + + optional EnumValueOptions options = 3; +} + +// Describes a service. +message ServiceDescriptorProto { + optional string name = 1; + repeated MethodDescriptorProto method = 2; + + optional ServiceOptions options = 3; +} + +// Describes a method of a service. +message MethodDescriptorProto { + optional string name = 1; + + // Input and output type names. These are resolved in the same way as + // FieldDescriptorProto.type_name, but must refer to a message type. + optional string input_type = 2; + optional string output_type = 3; + + optional MethodOptions options = 4; +} + +// =================================================================== +// Options + +// Each of the definitions above may have "options" attached. These are +// just annotations which may cause code to be generated slightly differently +// or may contain hints for code that manipulates protocol messages. +// +// Clients may define custom options as extensions of the *Options messages. +// These extensions may not yet be known at parsing time, so the parser cannot +// store the values in them. Instead it stores them in a field in the *Options +// message called uninterpreted_option. This field must have the same name +// across all *Options messages. We then use this field to populate the +// extensions when we build a descriptor, at which point all protos have been +// parsed and so all extensions are known. +// +// Extension numbers for custom options may be chosen as follows: +// * For options which will only be used within a single application or +// organization, or for experimental options, use field numbers 50000 +// through 99999. It is up to you to ensure that you do not use the +// same number for multiple options. +// * For options which will be published and used publicly by multiple +// independent entities, e-mail kenton@google.com to reserve extension +// numbers. Simply tell me how many you need and I'll send you back a +// set of numbers to use -- there's no need to explain how you intend to +// use them. If this turns out to be popular, a web service will be set up +// to automatically assign option numbers. + + +message FileOptions { + + // Sets the Java package where classes generated from this .proto will be + // placed. By default, the proto package is used, but this is often + // inappropriate because proto packages do not normally start with backwards + // domain names. + optional string java_package = 1; + + + // If set, all the classes from the .proto file are wrapped in a single + // outer class with the given name. This applies to both Proto1 + // (equivalent to the old "--one_java_file" option) and Proto2 (where + // a .proto always translates to a single class, but you may want to + // explicitly choose the class name). + optional string java_outer_classname = 8; + + // If set true, then the Java code generator will generate a separate .java + // file for each top-level message, enum, and service defined in the .proto + // file. Thus, these types will *not* be nested inside the outer class + // named by java_outer_classname. However, the outer class will still be + // generated to contain the file's getDescriptor() method as well as any + // top-level extensions defined in the file. + optional bool java_multiple_files = 10 [default=false]; + + // If set true, then the Java code generator will generate equals() and + // hashCode() methods for all messages defined in the .proto file. This is + // purely a speed optimization, as the AbstractMessage base class includes + // reflection-based implementations of these methods. + optional bool java_generate_equals_and_hash = 20 [default=false]; + + // Generated classes can be optimized for speed or code size. + enum OptimizeMode { + SPEED = 1; // Generate complete code for parsing, serialization, + // etc. + CODE_SIZE = 2; // Use ReflectionOps to implement these methods. + LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime. + } + optional OptimizeMode optimize_for = 9 [default=SPEED]; + + + + + // Should generic services be generated in each language? "Generic" services + // are not specific to any particular RPC system. They are generated by the + // main code generators in each language (without additional plugins). + // Generic services were the only kind of service generation supported by + // early versions of proto2. + // + // Generic services are now considered deprecated in favor of using plugins + // that generate code specific to your particular RPC system. Therefore, + // these default to false. Old code which depends on generic services should + // explicitly set them to true. + optional bool cc_generic_services = 16 [default=false]; + optional bool java_generic_services = 17 [default=false]; + optional bool py_generic_services = 18 [default=false]; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message MessageOptions { + // Set true to use the old proto1 MessageSet wire format for extensions. + // This is provided for backwards-compatibility with the MessageSet wire + // format. You should not use this for any other reason: It's less + // efficient, has fewer features, and is more complicated. + // + // The message must be defined exactly as follows: + // message Foo { + // option message_set_wire_format = true; + // extensions 4 to max; + // } + // Note that the message cannot have any defined fields; MessageSets only + // have extensions. + // + // All extensions of your type must be singular messages; e.g. they cannot + // be int32s, enums, or repeated messages. + // + // Because this is an option, the above two restrictions are not enforced by + // the protocol compiler. + optional bool message_set_wire_format = 1 [default=false]; + + // Disables the generation of the standard "descriptor()" accessor, which can + // conflict with a field of the same name. This is meant to make migration + // from proto1 easier; new code should avoid fields named "descriptor". + optional bool no_standard_descriptor_accessor = 2 [default=false]; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message FieldOptions { + // The ctype option instructs the C++ code generator to use a different + // representation of the field than it normally would. See the specific + // options below. This option is not yet implemented in the open source + // release -- sorry, we'll try to include it in a future version! + optional CType ctype = 1 [default = STRING]; + enum CType { + // Default mode. + STRING = 0; + + CORD = 1; + + STRING_PIECE = 2; + } + // The packed option can be enabled for repeated primitive fields to enable + // a more efficient representation on the wire. Rather than repeatedly + // writing the tag and type for each element, the entire array is encoded as + // a single length-delimited blob. + optional bool packed = 2; + + + // Is this field deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for accessors, or it will be completely ignored; in the very least, this + // is a formalization for deprecating fields. + optional bool deprecated = 3 [default=false]; + + // EXPERIMENTAL. DO NOT USE. + // For "map" fields, the name of the field in the enclosed type that + // is the key for this map. For example, suppose we have: + // message Item { + // required string name = 1; + // required string value = 2; + // } + // message Config { + // repeated Item items = 1 [experimental_map_key="name"]; + // } + // In this situation, the map key for Item will be set to "name". + // TODO: Fully-implement this, then remove the "experimental_" prefix. + optional string experimental_map_key = 9; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message EnumOptions { + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message EnumValueOptions { + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message ServiceOptions { + + // Note: Field numbers 1 through 32 are reserved for Google's internal RPC + // framework. We apologize for hoarding these numbers to ourselves, but + // we were already using them long before we decided to release Protocol + // Buffers. + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message MethodOptions { + + // Note: Field numbers 1 through 32 are reserved for Google's internal RPC + // framework. We apologize for hoarding these numbers to ourselves, but + // we were already using them long before we decided to release Protocol + // Buffers. + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +// A message representing a option the parser does not recognize. This only +// appears in options protos created by the compiler::Parser class. +// DescriptorPool resolves these when building Descriptor objects. Therefore, +// options protos in descriptor objects (e.g. returned by Descriptor::options(), +// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions +// in them. +message UninterpretedOption { + // The name of the uninterpreted option. Each string represents a segment in + // a dot-separated name. is_extension is true iff a segment represents an + // extension (denoted with parentheses in options specs in .proto files). + // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents + // "foo.(bar.baz).qux". + message NamePart { + required string name_part = 1; + required bool is_extension = 2; + } + repeated NamePart name = 2; + + // The value of the uninterpreted option, in whatever type the tokenizer + // identified it as during parsing. Exactly one of these should be set. + optional string identifier_value = 3; + optional uint64 positive_int_value = 4; + optional int64 negative_int_value = 5; + optional double double_value = 6; + optional bytes string_value = 7; + optional string aggregate_value = 8; +} + +// =================================================================== +// Optional source code info + +// Encapsulates information about the original source file from which a +// FileDescriptorProto was generated. +message SourceCodeInfo { + // A Location identifies a piece of source code in a .proto file which + // corresponds to a particular definition. This information is intended + // to be useful to IDEs, code indexers, documentation generators, and similar + // tools. + // + // For example, say we have a file like: + // message Foo { + // optional string foo = 1; + // } + // Let's look at just the field definition: + // optional string foo = 1; + // ^ ^^ ^^ ^ ^^^ + // a bc de f ghi + // We have the following locations: + // span path represents + // [a,i) [ 4, 0, 2, 0 ] The whole field definition. + // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional). + // [c,d) [ 4, 0, 2, 0, 5 ] The type (string). + // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo). + // [g,h) [ 4, 0, 2, 0, 3 ] The number (1). + // + // Notes: + // - A location may refer to a repeated field itself (i.e. not to any + // particular index within it). This is used whenever a set of elements are + // logically enclosed in a single code segment. For example, an entire + // extend block (possibly containing multiple extension definitions) will + // have an outer location whose path refers to the "extensions" repeated + // field without an index. + // - Multiple locations may have the same path. This happens when a single + // logical declaration is spread out across multiple places. The most + // obvious example is the "extend" block again -- there may be multiple + // extend blocks in the same scope, each of which will have the same path. + // - A location's span is not always a subset of its parent's span. For + // example, the "extendee" of an extension declaration appears at the + // beginning of the "extend" block and is shared by all extensions within + // the block. + // - Just because a location's span is a subset of some other location's span + // does not mean that it is a descendent. For example, a "group" defines + // both a type and a field in a single declaration. Thus, the locations + // corresponding to the type and field and their components will overlap. + // - Code which tries to interpret locations should probably be designed to + // ignore those that it doesn't understand, as more types of locations could + // be recorded in the future. + repeated Location location = 1; + message Location { + // Identifies which part of the FileDescriptorProto was defined at this + // location. + // + // Each element is a field number or an index. They form a path from + // the root FileDescriptorProto to the place where the definition. For + // example, this path: + // [ 4, 3, 2, 7, 1 ] + // refers to: + // file.message_type(3) // 4, 3 + // .field(7) // 2, 7 + // .name() // 1 + // This is because FileDescriptorProto.message_type has field number 4: + // repeated DescriptorProto message_type = 4; + // and DescriptorProto.field has field number 2: + // repeated FieldDescriptorProto field = 2; + // and FieldDescriptorProto.name has field number 1: + // optional string name = 1; + // + // Thus, the above path gives the location of a field name. If we removed + // the last element: + // [ 4, 3, 2, 7 ] + // this path refers to the whole field declaration (from the beginning + // of the label to the terminating semicolon). + repeated int32 path = 1 [packed=true]; + + // Always has exactly three or four elements: start line, start column, + // end line (optional, otherwise assumed same as start line), end column. + // These are packed into a single field for efficiency. Note that line + // and column numbers are zero-based -- typically you will want to add + // 1 to each before displaying to a user. + repeated int32 span = 2 [packed=true]; + + // TODO(kenton): Record comments appearing before and after the + // declaration. + } +} diff --git a/upb/descriptor/descriptor.upb.c b/upb/descriptor/descriptor.upb.c new file mode 100755 index 0000000..9a64c5b --- /dev/null +++ b/upb/descriptor/descriptor.upb.c @@ -0,0 +1,483 @@ +// This file was generated by upbc (the upb compiler). +// Do not edit -- your changes will be discarded when the file is +// regenerated. + +#include "upb/def.h" + +const upb_msgdef google_protobuf_msgs[20]; +const upb_fielddef google_protobuf_fields[73]; +const upb_enumdef google_protobuf_enums[4]; +const upb_tabent google_protobuf_strentries[192]; +const upb_tabent google_protobuf_intentries[66]; +const upb_value google_protobuf_arrays[97]; + +const upb_msgdef google_protobuf_msgs[20] = { + UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[0], &google_protobuf_arrays[0], 6, 5), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[0]), 31), + UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[6], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[16]), 2), + UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[10], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[20]), 11), + UPB_MSGDEF_INIT("google.protobuf.EnumOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[4], &google_protobuf_arrays[14], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[24]), 5), + UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[15], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[28]), 7), + UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[6], &google_protobuf_arrays[19], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[32]), 5), + UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[8], &google_protobuf_arrays[20], 6, 5), UPB_STRTABLE_INIT(8, 15, 9, 4, &google_protobuf_strentries[36]), 18), + UPB_MSGDEF_INIT("google.protobuf.FieldOptions", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[12], &google_protobuf_arrays[26], 5, 3), UPB_STRTABLE_INIT(5, 7, 9, 3, &google_protobuf_strentries[52]), 11), + UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", UPB_INTTABLE_INIT(4, 7, 9, 3, &google_protobuf_intentries[16], &google_protobuf_arrays[31], 6, 5), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[60]), 37), + UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[37], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[76]), 5), + UPB_MSGDEF_INIT("google.protobuf.FileOptions", UPB_INTTABLE_INIT(8, 15, 9, 4, &google_protobuf_intentries[24], &google_protobuf_arrays[40], 6, 1), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[80]), 17), + UPB_MSGDEF_INIT("google.protobuf.MessageOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[40], &google_protobuf_arrays[46], 4, 2), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[96]), 7), + UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[50], 5, 4), UPB_STRTABLE_INIT(4, 7, 9, 3, &google_protobuf_strentries[100]), 12), + UPB_MSGDEF_INIT("google.protobuf.MethodOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[42], &google_protobuf_arrays[55], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[108]), 5), + UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[56], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[112]), 11), + UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[44], &google_protobuf_arrays[60], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[116]), 5), + UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[61], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[120]), 5), + UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[64], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[124]), 6), + UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[46], &google_protobuf_arrays[68], 6, 4), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[128]), 17), + UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[74], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[144]), 4), +}; + +const upb_fielddef google_protobuf_fields[73] = { + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "aggregate_value", 8, &google_protobuf_msgs[18], NULL, 10, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "cc_generic_services", 16, &google_protobuf_msgs[10], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "ctype", 1, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_enums[2]), 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "default_value", 7, &google_protobuf_msgs[6], NULL, 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, "dependency", 3, &google_protobuf_msgs[8], NULL, 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "deprecated", 3, &google_protobuf_msgs[7], NULL, 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, "double_value", 6, &google_protobuf_msgs[18], NULL, 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "end", 2, &google_protobuf_msgs[1], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 4, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[2]), 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 5, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[2]), 18, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "experimental_map_key", 9, &google_protobuf_msgs[7], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "extendee", 2, &google_protobuf_msgs[6], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 7, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[6]), 34, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 6, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 25, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension_range", 5, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[1]), 20, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "field", 2, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "file", 1, &google_protobuf_msgs[9], upb_upcast(&google_protobuf_msgs[8]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "identifier_value", 3, &google_protobuf_msgs[18], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "input_type", 2, &google_protobuf_msgs[12], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, "is_extension", 2, &google_protobuf_msgs[19], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generate_equals_and_hash", 20, &google_protobuf_msgs[10], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generic_services", 17, &google_protobuf_msgs[10], NULL, 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_multiple_files", 10, &google_protobuf_msgs[10], NULL, 16, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_outer_classname", 8, &google_protobuf_msgs[10], NULL, 12, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_package", 1, &google_protobuf_msgs[10], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "label", 4, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[0]), 7, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "location", 1, &google_protobuf_msgs[16], upb_upcast(&google_protobuf_msgs[17]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "message_set_wire_format", 1, &google_protobuf_msgs[11], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "message_type", 4, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[0]), 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "method", 2, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[12]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[12], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[4], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[14], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[2], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[6], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "name", 2, &google_protobuf_msgs[18], upb_upcast(&google_protobuf_msgs[19]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[0], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[8], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, "name_part", 1, &google_protobuf_msgs[19], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, "negative_int_value", 5, &google_protobuf_msgs[18], NULL, 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "nested_type", 3, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[0]), 10, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "no_standard_descriptor_accessor", 2, &google_protobuf_msgs[11], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 2, &google_protobuf_msgs[4], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 3, &google_protobuf_msgs[6], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "optimize_for", 9, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_enums[3]), 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 4, &google_protobuf_msgs[12], upb_upcast(&google_protobuf_msgs[13]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[15]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[10]), 21, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[3]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 7, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[11]), 28, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_msgs[7]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[4], upb_upcast(&google_protobuf_msgs[5]), 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "output_type", 3, &google_protobuf_msgs[12], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "package", 2, &google_protobuf_msgs[8], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "packed", 2, &google_protobuf_msgs[7], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "path", 1, &google_protobuf_msgs[17], NULL, 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, "positive_int_value", 4, &google_protobuf_msgs[18], NULL, 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "py_generic_services", 18, &google_protobuf_msgs[10], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "service", 6, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[14]), 29, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "source_code_info", 9, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[16]), 24, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "span", 2, &google_protobuf_msgs[17], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "start", 1, &google_protobuf_msgs[1], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, "string_value", 7, &google_protobuf_msgs[18], NULL, 14, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "type", 5, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[1]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "type_name", 6, &google_protobuf_msgs[6], NULL, 12, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[15], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[11], upb_upcast(&google_protobuf_msgs[18]), 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[13], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_msgs[18]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_msgs[18]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[3], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[5], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "value", 2, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[4]), 5, UPB_VALUE_INIT_NONE), +}; + +const upb_enumdef google_protobuf_enums[4] = { + UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[148]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[78], 4, 3), 0), + UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, 1, 5, &google_protobuf_strentries[152]), UPB_INTTABLE_INIT(12, 15, 8, 4, &google_protobuf_intentries[50], &google_protobuf_arrays[82], 7, 6), 0), + UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[184]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[89], 4, 3), 0), + UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[188]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[93], 4, 3), 0), +}; + +const upb_tabent google_protobuf_strentries[192] = { + {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("field"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), NULL}, + {UPB_TABKEY_STR("extension_range"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("nested_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL}, + {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]), &google_protobuf_strentries[14]}, + {UPB_TABKEY_STR("start"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]), NULL}, + {UPB_TABKEY_STR("end"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]), NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]), NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]), &google_protobuf_strentries[22]}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]), NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]), &google_protobuf_strentries[30]}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("label"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]), &google_protobuf_strentries[49]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("type_name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL}, + {UPB_TABKEY_STR("extendee"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]), NULL}, + {UPB_TABKEY_STR("type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]), &google_protobuf_strentries[48]}, + {UPB_TABKEY_STR("default_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL}, + {UPB_TABKEY_STR("experimental_map_key"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), &google_protobuf_strentries[58]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("ctype"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("deprecated"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]), NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL}, + {UPB_TABKEY_STR("packed"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]), NULL}, + {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]), NULL}, + {UPB_TABKEY_STR("service"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("source_code_info"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("dependency"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]), NULL}, + {UPB_TABKEY_STR("message_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]), NULL}, + {UPB_TABKEY_STR("package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL}, + {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]), &google_protobuf_strentries[74]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("file"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("cc_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("java_multiple_files"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("java_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), &google_protobuf_strentries[94]}, + {UPB_TABKEY_STR("java_generate_equals_and_hash"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("java_package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]), NULL}, + {UPB_TABKEY_STR("optimize_for"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL}, + {UPB_TABKEY_STR("py_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL}, + {UPB_TABKEY_STR("java_outer_classname"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL}, + {UPB_TABKEY_STR("message_set_wire_format"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]), &google_protobuf_strentries[98]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL}, + {UPB_TABKEY_STR("no_standard_descriptor_accessor"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]), NULL}, + {UPB_TABKEY_STR("input_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("output_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]), NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]), NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]), &google_protobuf_strentries[114]}, + {UPB_TABKEY_STR("method"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]), NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]), &google_protobuf_strentries[113]}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("location"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("span"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]), NULL}, + {UPB_TABKEY_STR("path"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]), &google_protobuf_strentries[126]}, + {UPB_TABKEY_STR("double_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("negative_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]), NULL}, + {UPB_TABKEY_STR("aggregate_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("positive_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]), NULL}, + {UPB_TABKEY_STR("identifier_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]), NULL}, + {UPB_TABKEY_STR("string_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), &google_protobuf_strentries[142]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("is_extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]), NULL}, + {UPB_TABKEY_STR("name_part"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]), NULL}, + {UPB_TABKEY_STR("LABEL_REQUIRED"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[150]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("LABEL_REPEATED"), UPB_VALUE_INIT_INT32(3), NULL}, + {UPB_TABKEY_STR("LABEL_OPTIONAL"), UPB_VALUE_INIT_INT32(1), NULL}, + {UPB_TABKEY_STR("TYPE_FIXED64"), UPB_VALUE_INIT_INT32(6), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_STRING"), UPB_VALUE_INIT_INT32(9), NULL}, + {UPB_TABKEY_STR("TYPE_FLOAT"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[181]}, + {UPB_TABKEY_STR("TYPE_DOUBLE"), UPB_VALUE_INIT_INT32(1), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_INT32"), UPB_VALUE_INIT_INT32(5), NULL}, + {UPB_TABKEY_STR("TYPE_SFIXED32"), UPB_VALUE_INIT_INT32(15), NULL}, + {UPB_TABKEY_STR("TYPE_FIXED32"), UPB_VALUE_INIT_INT32(7), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_MESSAGE"), UPB_VALUE_INIT_INT32(11), &google_protobuf_strentries[182]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_INT64"), UPB_VALUE_INIT_INT32(3), &google_protobuf_strentries[179]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_ENUM"), UPB_VALUE_INIT_INT32(14), NULL}, + {UPB_TABKEY_STR("TYPE_UINT32"), UPB_VALUE_INIT_INT32(13), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_UINT64"), UPB_VALUE_INIT_INT32(4), &google_protobuf_strentries[178]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_SFIXED64"), UPB_VALUE_INIT_INT32(16), NULL}, + {UPB_TABKEY_STR("TYPE_BYTES"), UPB_VALUE_INIT_INT32(12), NULL}, + {UPB_TABKEY_STR("TYPE_SINT64"), UPB_VALUE_INIT_INT32(18), NULL}, + {UPB_TABKEY_STR("TYPE_BOOL"), UPB_VALUE_INIT_INT32(8), NULL}, + {UPB_TABKEY_STR("TYPE_GROUP"), UPB_VALUE_INIT_INT32(10), NULL}, + {UPB_TABKEY_STR("TYPE_SINT32"), UPB_VALUE_INIT_INT32(17), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("CORD"), UPB_VALUE_INIT_INT32(1), NULL}, + {UPB_TABKEY_STR("STRING"), UPB_VALUE_INIT_INT32(0), &google_protobuf_strentries[185]}, + {UPB_TABKEY_STR("STRING_PIECE"), UPB_VALUE_INIT_INT32(2), NULL}, + {UPB_TABKEY_STR("CODE_SIZE"), UPB_VALUE_INIT_INT32(2), NULL}, + {UPB_TABKEY_STR("SPEED"), UPB_VALUE_INIT_INT32(1), &google_protobuf_strentries[191]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("LITE_RUNTIME"), UPB_VALUE_INIT_INT32(3), NULL}, +}; + +const upb_tabent google_protobuf_intentries[66] = { + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL}, + {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL}, + {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), NULL}, + {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(20), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL}, + {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), NULL}, + {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED64"), NULL}, + {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT32"), NULL}, + {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT64"), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED32"), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR("TYPE_BOOL"), NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR("TYPE_STRING"), NULL}, + {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR("TYPE_GROUP"), NULL}, + {UPB_TABKEY_NUM(11), UPB_VALUE_INIT_CONSTPTR("TYPE_MESSAGE"), NULL}, + {UPB_TABKEY_NUM(12), UPB_VALUE_INIT_CONSTPTR("TYPE_BYTES"), NULL}, + {UPB_TABKEY_NUM(13), UPB_VALUE_INIT_CONSTPTR("TYPE_UINT32"), NULL}, + {UPB_TABKEY_NUM(14), UPB_VALUE_INIT_CONSTPTR("TYPE_ENUM"), NULL}, + {UPB_TABKEY_NUM(15), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED32"), NULL}, +}; + +const upb_value google_protobuf_arrays[97] = { + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR("LABEL_OPTIONAL"), + UPB_VALUE_INIT_CONSTPTR("LABEL_REQUIRED"), + UPB_VALUE_INIT_CONSTPTR("LABEL_REPEATED"), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR("TYPE_DOUBLE"), + UPB_VALUE_INIT_CONSTPTR("TYPE_FLOAT"), + UPB_VALUE_INIT_CONSTPTR("TYPE_INT64"), + UPB_VALUE_INIT_CONSTPTR("TYPE_UINT64"), + UPB_VALUE_INIT_CONSTPTR("TYPE_INT32"), + UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED64"), + UPB_VALUE_INIT_CONSTPTR("STRING"), + UPB_VALUE_INIT_CONSTPTR("CORD"), + UPB_VALUE_INIT_CONSTPTR("STRING_PIECE"), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR("SPEED"), + UPB_VALUE_INIT_CONSTPTR("CODE_SIZE"), + UPB_VALUE_INIT_CONSTPTR("LITE_RUNTIME"), +}; + diff --git a/upb/descriptor/descriptor.upb.h b/upb/descriptor/descriptor.upb.h new file mode 100755 index 0000000..4903ae5 --- /dev/null +++ b/upb/descriptor/descriptor.upb.h @@ -0,0 +1,90 @@ +// This file was generated by upbc (the upb compiler). +// Do not edit -- your changes will be discarded when the file is +// regenerated. + +#ifndef GOOGLE_PROTOBUF_UPB_H_ +#define GOOGLE_PROTOBUF_UPB_H_ + +#include "upb/def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Enums + +typedef enum { + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17, +} google_protobuf_FieldDescriptorProto_Type; + +typedef enum { + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1, +} google_protobuf_FieldDescriptorProto_Label; + +typedef enum { + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2, +} google_protobuf_FieldOptions_CType; + +typedef enum { + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2, + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3, +} google_protobuf_FileOptions_OptimizeMode; + +// Do not refer to these forward declarations; use the constants +// below. +extern const upb_msgdef google_protobuf_msgs[20]; +extern const upb_fielddef google_protobuf_fields[73]; +extern const upb_enumdef google_protobuf_enums[4]; + +// Constants for references to defs. +// We hide these behind macros to decouple users from the +// details of how we have statically defined them (ie. whether +// each def has its own symbol or lives in an array of defs). +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO &google_protobuf_msgs[0] +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE &google_protobuf_msgs[1] +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO &google_protobuf_msgs[2] +#define GOOGLE_PROTOBUF_ENUMOPTIONS &google_protobuf_msgs[3] +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO &google_protobuf_msgs[4] +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS &google_protobuf_msgs[5] +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO &google_protobuf_msgs[6] +#define GOOGLE_PROTOBUF_FIELDOPTIONS &google_protobuf_msgs[7] +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO &google_protobuf_msgs[8] +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET &google_protobuf_msgs[9] +#define GOOGLE_PROTOBUF_FILEOPTIONS &google_protobuf_msgs[10] +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS &google_protobuf_msgs[11] +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO &google_protobuf_msgs[12] +#define GOOGLE_PROTOBUF_METHODOPTIONS &google_protobuf_msgs[13] +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO &google_protobuf_msgs[14] +#define GOOGLE_PROTOBUF_SERVICEOPTIONS &google_protobuf_msgs[15] +#define GOOGLE_PROTOBUF_SOURCECODEINFO &google_protobuf_msgs[16] +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION &google_protobuf_msgs[17] +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION &google_protobuf_msgs[18] +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART &google_protobuf_msgs[19] + +#ifdef __cplusplus +}; // extern "C" +#endif + +#endif // GOOGLE_PROTOBUF_UPB_H_ diff --git a/upb/descriptor/descriptor_const.h b/upb/descriptor/descriptor_const.h deleted file mode 100644 index 52ca803..0000000 --- a/upb/descriptor/descriptor_const.h +++ /dev/null @@ -1,349 +0,0 @@ -/* This file was generated by upbc (the upb compiler). Do not edit. */ - -#ifndef UPB_DESCRIPTOR_CONST_H -#define UPB_DESCRIPTOR_CONST_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* Enums. */ - -typedef enum google_protobuf_FieldDescriptorProto_Type { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17 -} google_protobuf_FieldDescriptorProto_Type; - -typedef enum google_protobuf_FieldDescriptorProto_Label { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1 -} google_protobuf_FieldDescriptorProto_Label; - -typedef enum google_protobuf_FieldOptions_CType { - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2 -} google_protobuf_FieldOptions_CType; - -typedef enum google_protobuf_FileOptions_OptimizeMode { - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2, - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3 -} google_protobuf_FileOptions_OptimizeMode; - -/* Constants for field names and numbers. */ - -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path" -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNUM 2 -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNAME "span" -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNAME "identifier_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNAME "positive_int_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDTYPE 4 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNUM 7 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNAME "package" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNUM 3 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNAME "dependency" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNAME "message_type" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNAME "input_type" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNAME "output_type" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNUM 4 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location" -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNUM 1 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNAME "start" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNUM 2 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNUM 2 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNAME "packed" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNUM 3 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNAME "deprecated" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNAME "experimental_map_key" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16 -#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services" -#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNUM 17 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNAME "java_generic_services" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNUM 18 -#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNAME "py_generic_services" -#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNUM 20 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNAME "java_generate_equals_and_hash" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for" -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNAME "extendee" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM 3 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNAME "number" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM 4 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNAME "label" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM 7 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8 - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DESCRIPTOR_CONST_H */ diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c index 8177560..16f3b24 100644 --- a/upb/descriptor/reader.c +++ b/upb/descriptor/reader.c @@ -3,21 +3,38 @@ * * Copyright (c) 2008-2009 Google Inc. See LICENSE for details. * Author: Josh Haberman + * + * XXX: The routines in this file that consume a string do not currently + * support having the string span buffers. In the future, as upb_sink and + * its buffering/sharing functionality evolve there should be an easy and + * idiomatic way of correctly handling this case. For now, we accept this + * limitation since we currently only parse descriptors from single strings. */ -#include +#include "upb/descriptor/reader.h" + #include +#include +#include +#include "upb/bytestream.h" #include "upb/def.h" -#include "upb/descriptor/descriptor_const.h" -#include "upb/descriptor/reader.h" +#include "upb/descriptor/descriptor.upb.h" + +static char *upb_strndup(const char *buf, size_t n) { + char *ret = malloc(n + 1); + if (!ret) return NULL; + memcpy(ret, buf, n); + ret[n] = '\0'; + return ret; +} // Returns a newly allocated string that joins input strings together, for example: // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" // join("", "Baz") -> "Baz" -// Caller owns a ref on the returned string. */ +// Caller owns a ref on the returned string. static char *upb_join(const char *base, const char *name) { if (!base || strlen(base) == 0) { - return strdup(name); + return upb_strdup(name); } else { char *ret = malloc(strlen(base) + strlen(name) + 2); ret[0] = '\0'; @@ -74,10 +91,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { } } -// Forward declares for top-level file descriptors. -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h); -static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h); - void upb_descreader_init(upb_descreader *r) { upb_deflist_init(&r->defs); upb_status_init(&r->status); @@ -107,7 +120,7 @@ static upb_msgdef *upb_descreader_top(upb_descreader *r) { if (r->stack_len <= 1) return NULL; int index = r->stack[r->stack_len-1].start - 1; assert(index >= 0); - return upb_downcast_msgdef(r->defs.defs[index]); + return upb_downcast_msgdef_mutable(r->defs.defs[index]); } static upb_def *upb_descreader_last(upb_descreader *r) { @@ -136,144 +149,80 @@ void upb_descreader_setscopename(upb_descreader *r, char *str) { } // Handlers for google.protobuf.FileDescriptorProto. -static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) { +static bool file_startmsg(void *_r) { upb_descreader *r = _r; upb_descreader_startcontainer(r); - return UPB_CONTINUE; + return true; } -static void upb_descreader_FileDescriptorProto_endmsg(void *_r, - upb_status *status) { - (void)status; +static void file_endmsg(void *_r, upb_status *status) { + UPB_UNUSED(status); upb_descreader *r = _r; upb_descreader_endcontainer(r); } -static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static size_t file_onpackage(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_descreader_setscopename( - r, upb_byteregion_strdup(upb_value_getbyteregion(val))); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_descreader_register_FileDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); - upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package); - - upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, - upb_msgdef_register_DescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); - // TODO: services, extensions - return m; -} -#undef FNUM -#undef FTYPE - -static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE - upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true, - upb_descreader_register_FileDescriptorProto(h)); - return m; -} -#undef FNUM -#undef FTYPE - -upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) { - h->should_jit = false; - return upb_descreader_register_FileDescriptorSet(h); + // XXX: see comment at the top of the file. + upb_descreader_setscopename(r, upb_strndup(buf, n)); + return n; } -// google.protobuf.EnumValueDescriptorProto. -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) { +// Handlers for google.protobuf.EnumValueDescriptorProto. +static bool enumval_startmsg(void *_r) { upb_descreader *r = _r; r->saw_number = false; r->saw_name = false; - return UPB_CONTINUE; + return true; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static size_t enumval_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; + // XXX: see comment at the top of the file. free(r->name); - r->name = upb_byteregion_strdup(upb_value_getbyteregion(val)); + r->name = upb_strndup(buf, n); r->saw_name = true; - return UPB_CONTINUE; + return n; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static bool enumval_onnumber(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - r->number = upb_value_getint32(val); + r->number = val; r->saw_number = true; - return UPB_CONTINUE; + return true; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, - upb_status *status) { +static void enumval_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; if(!r->saw_number || !r->saw_name) { upb_status_seterrliteral(status, "Enum value missing name or number."); return; } - upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); if (upb_enumdef_numvals(e) == 0) { // The default value of an enum (in the absence of an explicit default) is // its first listed value. upb_enumdef_setdefault(e, r->number); } - upb_enumdef_addval(e, r->name, r->number); + upb_enumdef_addval(e, r->name, r->number, status); free(r->name); r->name = NULL; } -static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f; - f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name); - - f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number); - return m; -} -#undef FNUM -#undef FTYPE -// google.protobuf.EnumDescriptorProto. -static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) { +// Handlers for google.protobuf.EnumDescriptorProto. +static bool enum_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs))); - return UPB_CONTINUE; + upb_deflist_push(&r->defs, upb_upcast(upb_enumdef_new(&r->defs))); + return true; } -static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) { +static void enum_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; - upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) { upb_status_seterrliteral(status, "Enum had no name."); return; @@ -284,46 +233,28 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) } } -static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static size_t enum_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val)); + // XXX: see comment at the top of the file. + char *fullname = upb_strndup(buf, n); upb_def_setfullname(upb_descreader_last(r), fullname); free(fullname); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name); - - upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true, - upb_enumdef_register_EnumValueDescriptorProto(h)); - return m; + return n; } -#undef FNUM -#undef FTYPE -static upb_flow_t upb_fielddef_startmsg(void *_r) { +// Handlers for google.protobuf.FieldDescriptorProto +static bool field_startmsg(void *_r) { upb_descreader *r = _r; r->f = upb_fielddef_new(&r->defs); free(r->default_string); r->default_string = NULL; - return UPB_CONTINUE; + return true; } // Converts the default value in string "str" into "d". Passes a ref on str. // Returns true on success. -static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) { +static bool parse_default(char *str, upb_value *d, int type) { bool success = true; if (str) { switch(type) { @@ -397,29 +328,24 @@ static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) { return success; } -static void upb_fielddef_endmsg(void *_r, upb_status *status) { +static void field_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_fielddef *f = r->f; // TODO: verify that all required fields were present. - assert(f->number != -1 && upb_fielddef_name(f) != NULL); - assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f)); - - // Field was successfully read, add it as a field of the msgdef. - upb_msgdef *m = upb_descreader_top(r); - upb_msgdef_addfield(m, f, &r->defs); - r->f = NULL; + assert(upb_fielddef_number(f) != 0 && upb_fielddef_name(f) != NULL); + assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f)); if (r->default_string) { - if (upb_issubmsg(f)) { + if (upb_fielddef_issubmsg(f)) { upb_status_seterrliteral(status, "Submessages cannot have defaults."); return; } - if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) { + if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM)) { upb_fielddef_setdefaultcstr(f, r->default_string); } else { upb_value val; upb_value_setptr(&val, NULL); // Silence inaccurate compiler warnings. - if (!upb_fielddef_parsedefault(r->default_string, &val, f->type)) { + if (!parse_default(r->default_string, &val, upb_fielddef_type(f))) { // We don't worry too much about giving a great error message since the // compiler should have ensured this was correct. upb_status_seterrliteral(status, "Error converting default value."); @@ -430,132 +356,147 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) { } } -static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) { - (void)fval; +static bool field_ontype(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_fielddef_settype(r->f, upb_value_getint32(val)); - return UPB_CONTINUE; + upb_fielddef_settype(r->f, val); + return true; } -static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) { - (void)fval; +static bool field_onlabel(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_fielddef_setlabel(r->f, upb_value_getint32(val)); - return UPB_CONTINUE; + upb_fielddef_setlabel(r->f, val); + return true; } -static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) { - (void)fval; +static bool field_onnumber(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_fielddef_setnumber(r->f, upb_value_getint32(val)); - return UPB_CONTINUE; + upb_fielddef_setnumber(r->f, val); + return true; } -static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) { - (void)fval; +static size_t field_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); + // XXX: see comment at the top of the file. + char *name = upb_strndup(buf, n); upb_fielddef_setname(r->f, name); free(name); - return UPB_CONTINUE; + return n; } -static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, - upb_value val) { - (void)fval; +static size_t field_ontypename(void *_r, void *fval, const char *buf, + size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_fielddef_setsubtypename(r->f, name); + // XXX: see comment at the top of the file. + char *name = upb_strndup(buf, n); + upb_fielddef_setsubdefname(r->f, name); free(name); - return UPB_CONTINUE; + return n; } -static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval, - upb_value val) { - (void)fval; +static size_t field_ondefaultval(void *_r, void *fval, const char *buf, + size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; // Have to convert from string to the correct type, but we might not know the - // type yet. + // type yet, so we save it as a string until the end of the field. + // XXX: see comment at the top of the file. free(r->default_string); - r->default_string = upb_byteregion_strdup(upb_value_getbyteregion(val)); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg); - upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg); - -#define FIELD(name, handler) \ - upb_fhandlers_setvalue( \ - upb_mhandlers_newfhandlers(m, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \ - false), \ - handler); - FIELD(TYPE, &upb_fielddef_ontype); - FIELD(LABEL, &upb_fielddef_onlabel); - FIELD(NUMBER, &upb_fielddef_onnumber); - FIELD(NAME, &upb_fielddef_onname); - FIELD(TYPE_NAME, &upb_fielddef_ontypename); - FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval); - return m; -} -#undef FNUM -#undef FTYPE - - -// google.protobuf.DescriptorProto. -static upb_flow_t upb_msgdef_startmsg(void *_r) { + r->default_string = upb_strndup(buf, n); + return n; +} + +// Handlers for google.protobuf.DescriptorProto (representing a message). +static bool msg_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs))); + upb_deflist_push(&r->defs, upb_upcast(upb_msgdef_new(&r->defs))); upb_descreader_startcontainer(r); - return UPB_CONTINUE; + return true; } -static void upb_msgdef_endmsg(void *_r, upb_status *status) { +static void msg_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); - if(!upb_def_fullname(UPB_UPCAST(m))) { + if(!upb_def_fullname(upb_upcast(m))) { upb_status_seterrliteral(status, "Encountered message with no name."); return; } upb_descreader_endcontainer(r); } -static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { - (void)fval; +static size_t msg_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); - char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_def_setfullname(UPB_UPCAST(m), name); + // XXX: see comment at the top of the file. + char *name = upb_strndup(buf, n); + upb_def_setfullname(upb_upcast(m), name); upb_descreader_setscopename(r, name); // Passes ownership of name. - return UPB_CONTINUE; + return n; } -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg); - upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_msgdef_onname); - - upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true, - upb_fielddef_register_FieldDescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); +static bool msg_onendfield(void *_r, void *fval) { + UPB_UNUSED(fval); + upb_descreader *r = _r; + upb_msgdef *m = upb_descreader_top(r); + upb_msgdef_addfield(m, r->f, &r->defs); + r->f = NULL; + return true; +} - // DescriptorProto is self-recursive, so we must link the definition. - upb_mhandlers_newfhandlers_subm( - m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m); +static bool discardfield(void *_r, void *fval) { + UPB_UNUSED(fval); + upb_descreader *r = _r; + // Discard extension field so we don't leak it. + upb_fielddef_unref(r->f, &r->defs); + r->f = NULL; + return true; +} + +static void reghandlers(void *closure, upb_handlers *h) { + UPB_UNUSED(closure); + const upb_msgdef *m = upb_handlers_msgdef(h); + + if (m == GOOGLE_PROTOBUF_DESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &msg_startmsg); + upb_handlers_setendmsg(h, &msg_endmsg); + upb_handlers_setstring_n(h, "name", &msg_onname, NULL, NULL); + upb_handlers_setendsubmsg_n(h, "field", &msg_onendfield, NULL, NULL); + // TODO: support extensions + upb_handlers_setendsubmsg_n(h, "extension", &discardfield, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &file_startmsg); + upb_handlers_setendmsg(h, &file_endmsg); + upb_handlers_setstring_n(h, "package", &file_onpackage, NULL, NULL); + // TODO: support extensions + upb_handlers_setendsubmsg_n(h, "extension", &discardfield, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &enumval_startmsg); + upb_handlers_setendmsg(h, &enumval_endmsg); + upb_handlers_setstring_n(h, "name", &enumval_onname, NULL, NULL); + upb_handlers_setint32_n(h, "number", &enumval_onnumber, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &enum_startmsg); + upb_handlers_setendmsg(h, &enum_endmsg); + upb_handlers_setstring_n(h, "name", &enum_onname, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &field_startmsg); + upb_handlers_setendmsg(h, &field_endmsg); + upb_handlers_setint32_n (h, "type", &field_ontype, NULL, NULL); + upb_handlers_setint32_n (h, "label", &field_onlabel, NULL, NULL); + upb_handlers_setint32_n (h, "number", &field_onnumber, NULL, NULL); + upb_handlers_setstring_n(h, "name", &field_onname, NULL, NULL); + upb_handlers_setstring_n(h, "type_name", &field_ontypename, NULL, NULL); + upb_handlers_setstring_n( + h, "default_value", &field_ondefaultval, NULL, NULL); + } +} - // TODO: extensions. - return m; +const upb_handlers *upb_descreader_newhandlers(const void *owner) { + return upb_handlers_newfrozen( + GOOGLE_PROTOBUF_FILEDESCRIPTORSET, owner, reghandlers, NULL); } -#undef FNUM -#undef FTYPE diff --git a/upb/descriptor/reader.h b/upb/descriptor/reader.h index 0e1bfa0..4312682 100644 --- a/upb/descriptor/reader.h +++ b/upb/descriptor/reader.h @@ -67,7 +67,7 @@ void upb_descreader_uninit(upb_descreader *r); // Registers handlers that will build the defs. Pass the descreader as the // closure. -upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h); +const upb_handlers *upb_descreader_newhandlers(const void *owner); // Gets the array of defs that have been parsed and removes them from the // descreader. Ownership of the defs is passed to the caller using the given diff --git a/upb/google/README b/upb/google/README new file mode 100644 index 0000000..a237583 --- /dev/null +++ b/upb/google/README @@ -0,0 +1,16 @@ +This directory contains code to interoperate with Google's official +Protocol Buffers release. Since it doesn't really have a name +besides "protobuf," calling this directory "google" seems like the +least confusing option. + +We support writing into protobuf's generated classes (and hopefully +reading too, before long). We support both the open source protobuf +release and the Google-internal version of the same code. The two +live in different namespaces, and the internal version supports some +features that are not supported in the open-source release. Also, the +internal version includes the legacy "proto1" classes which we must +support; thankfully this is mostly relegated to its own separate file. + +Our functionality requires the full google::protobuf::Message +interface; we rely on reflection so we know what fields to read/write +and where to put them, so we can't support MessageLite. diff --git a/upb/google/bridge.cc b/upb/google/bridge.cc new file mode 100644 index 0000000..4d64ab8 --- /dev/null +++ b/upb/google/bridge.cc @@ -0,0 +1,260 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/google/bridge.h" + +#include +#include +#include "upb/def.h" +#include "upb/google/proto1.h" +#include "upb/google/proto2.h" +#include "upb/handlers.h" + +namespace upb { +namespace proto2_bridge_google3 { class Defs; } +namespace proto2_bridge_opensource { class Defs; } +} // namespace upb + +#ifdef UPB_GOOGLE3 +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/message.h" +#include "net/proto2/proto/descriptor.pb.h" +namespace goog = ::proto2; +namespace me = ::upb::proto2_bridge_google3; +#else +#include "google/protobuf/descriptor.h" +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.pb.h" +namespace goog = ::google::protobuf; +namespace me = ::upb::proto2_bridge_opensource; +#endif + +class me::Defs { + public: + void OnMessage(Handlers* h) { + const upb::MessageDef* md = h->message_def(); + const goog::Message& m = *message_map_[md]; + const goog::Descriptor* d = m.GetDescriptor(); + for (upb::MessageDef::ConstIterator i(md); !i.Done(); i.Next()) { + const upb::FieldDef* upb_f = i.field(); + const goog::FieldDescriptor* proto2_f = + d->FindFieldByNumber(upb_f->number()); + if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h) +#ifdef UPB_GOOGLE3 + && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h) +#endif + ) { + // Unsupported reflection class. + // + // Should we fall back to using the public Reflection interface in this + // case? It's unclear whether it's supported behavior for users to + // create their own Reflection classes. + assert(false); + } + } + } + + static void StaticOnMessage(void *closure, upb::Handlers* handlers) { + me::Defs* defs = static_cast(closure); + defs->OnMessage(handlers); + } + + void AddSymbol(const std::string& name, upb::Def* def) { + assert(symbol_map_.find(name) == symbol_map_.end()); + symbol_map_[name] = def; + } + + void AddMessage(const goog::Message* m, upb::MessageDef* md) { + assert(message_map_.find(md) == message_map_.end()); + message_map_[md] = m; + AddSymbol(m->GetDescriptor()->full_name(), md->Upcast()); + } + + upb::Def* FindSymbol(const std::string& name) { + SymbolMap::iterator iter = symbol_map_.find(name); + return iter != symbol_map_.end() ? iter->second : NULL; + } + + void Flatten(std::vector* defs) { + SymbolMap::iterator iter; + for (iter = symbol_map_.begin(); iter != symbol_map_.end(); ++iter) { + defs->push_back(iter->second); + } + } + + private: + // Maps a new upb::MessageDef* to a corresponding proto2 Message* whose + // derived class is of the correct type according to the message the user + // gave us. + typedef std::map MessageMap; + MessageMap message_map_; + + // Maps a type name to a upb Def we have constructed to represent it. + typedef std::map SymbolMap; + SymbolMap symbol_map_; +}; + +namespace upb { +namespace google { + +// For submessage fields, stores a pointer to an instance of the submessage in +// *subm (but it is *not* guaranteed to be a prototype). +FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f, + upb::MessageDef* md, const goog::Message** subm) { + // To parse weak submessages effectively, we need to represent them in the + // upb::Def schema even though they are not reflected in the proto2 + // descriptors (weak fields are represented as FieldDescriptor::TYPE_BYTES). + const goog::Message* weak_prototype = NULL; +#ifdef UPB_GOOGLE3 + weak_prototype = upb::google::GetProto1WeakPrototype(m, f); +#endif + + upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f); + upb_f->set_number(f->number()); + upb_f->set_name(f->name()); + upb_f->set_label(static_cast(f->label())); + upb_f->set_type(weak_prototype ? + UPB_TYPE_MESSAGE : static_cast(f->type())); + + if (weak_prototype) { + upb_f->set_subdef_name(weak_prototype->GetDescriptor()->full_name()); + } else if (upb_f->IsSubMessage()) { + upb_f->set_subdef_name(f->message_type()->full_name()); + } else if (upb_f->type() == UPB_TYPE(ENUM)) { + // We set the enum default numerically. + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_enum()->number()))); + upb_f->set_subdef_name(f->enum_type()->full_name()); + } else { + // Set field default for primitive types. Need to switch on the upb type + // rather than the proto2 type, because upb_f->type() may have been changed + // from BYTES to MESSAGE for a weak field. + switch (upb_types[upb_f->type()].inmemory_type) { + case UPB_CTYPE_INT32: + upb_f->set_default_value(MakeValue(f->default_value_int32())); + break; + case UPB_CTYPE_INT64: + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_int64()))); + break; + case UPB_CTYPE_UINT32: + upb_f->set_default_value(MakeValue(f->default_value_uint32())); + break; + case UPB_CTYPE_UINT64: + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_uint64()))); + break; + case UPB_CTYPE_DOUBLE: + upb_f->set_default_value(MakeValue(f->default_value_double())); + break; + case UPB_CTYPE_FLOAT: + upb_f->set_default_value(MakeValue(f->default_value_float())); + break; + case UPB_CTYPE_BOOL: + upb_f->set_default_value(MakeValue(f->default_value_bool())); + break; + case UPB_CTYPE_BYTEREGION: + upb_f->set_default_string(f->default_value_string()); + break; + } + } + bool ok = md->AddField(upb_f, &upb_f); + UPB_ASSERT_VAR(ok, ok); + + if (weak_prototype) { + *subm = weak_prototype; + } else if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) { + *subm = upb::google::GetFieldPrototype(m, f); +#ifdef UPB_GOOGLE3 + if (!*subm) + *subm = upb::google::GetProto1FieldPrototype(m, f); +#endif + assert(*subm); + } + + return upb_f; +} + +upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, void *owner) { + upb::EnumDef* e = upb::EnumDef::New(owner); + e->set_full_name(desc->full_name()); + for (int i = 0; i < desc->value_count(); i++) { + const goog::EnumValueDescriptor* val = desc->value(i); + bool success = e->AddValue(val->name(), val->number(), NULL); + UPB_ASSERT_VAR(success, success); + } + return e; +} + +static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner, + me::Defs* defs) { + upb::MessageDef* md = upb::MessageDef::New(owner); + md->set_full_name(m.GetDescriptor()->full_name()); + + // Must do this before processing submessages to prevent infinite recursion. + defs->AddMessage(&m, md); + + const goog::Descriptor* d = m.GetDescriptor(); + for (int i = 0; i < d->field_count(); i++) { + const goog::FieldDescriptor* proto2_f = d->field(i); + +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (proto2_f->options().lazy()) continue; +#endif + // Extensions not supported yet. + if (proto2_f->is_extension()) continue; + + const goog::Message* subm_prototype; + upb::FieldDef* f = AddFieldDef(m, proto2_f, md, &subm_prototype); + + if (!f->HasSubDef()) continue; + + upb::Def* subdef = defs->FindSymbol(f->subdef_name()); + if (!subdef) { + if (f->type() == UPB_TYPE(ENUM)) { + subdef = NewEnumDef(proto2_f->enum_type(), owner)->Upcast(); + defs->AddSymbol(subdef->full_name(), subdef); + } else { + assert(f->IsSubMessage()); + assert(subm_prototype); + subdef = NewMessageDef(*subm_prototype, owner, defs)->Upcast(); + } + } + f->set_subdef(subdef); + } + + return md; +} + +const upb::Handlers* NewWriteHandlers(const goog::Message& m, void *owner) { + me::Defs defs; + const upb::MessageDef* md = NewMessageDef(m, owner, &defs); + + std::vector defs_vec; + defs.Flatten(&defs_vec); + Status status; + bool success = Def::Freeze(defs_vec, &status); + UPB_ASSERT_VAR(success, success); + + const upb::Handlers* ret = + upb::Handlers::NewFrozen(md, owner, me::Defs::StaticOnMessage, &defs); + + // Unref all defs, since they're now ref'd by the handlers. + for (int i = 0; i < static_cast(defs_vec.size()); i++) { + defs_vec[i]->Unref(owner); + } + + return ret; +} + +} // namespace google +} // namespace upb diff --git a/upb/google/bridge.h b/upb/google/bridge.h new file mode 100644 index 0000000..8a2256f --- /dev/null +++ b/upb/google/bridge.h @@ -0,0 +1,76 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This file contains functionality for constructing upb Defs and Handlers +// corresponding to proto2 messages. Using this functionality, you can use upb +// to dynamically generate parsing code that can behave exactly like proto2's +// generated parsing code. Alternatively, you can configure things to +// read/write only a subset of the fields for higher performance when only some +// fields are needed. +// +// Example usage (FIX XXX): +// +// // Build a def that will have all fields and parse just like proto2 would. +// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto()); +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::Handlers* handlers = upb::NewHandlersForMessage(md); +// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers); +// handlers->Unref(); +// +// // The actual parsing. +// MyProto proto; +// upb::Decoder decoder; +// upb::StringSource source(buf, len); +// decoder.ResetPlan(plan, 0); +// decoder.ResetInput(source.AllBytes(), &proto); +// CHECK(decoder.Decode() == UPB_OK) << decoder.status(); +// +// To parse only one field and skip all others: +// +// const upb::MessageDef* md = +// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype()); +// upb::proto2_bridge::AddFieldDef( +// MyProto::descriptor()->FindFieldByName("my_field"), md); +// upb::Freeze(md); +// +// // Now continue with "JIT the parser" from above. +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_GOOGLE_BRIDGE_H_ +#define UPB_GOOGLE_BRIDGE_H_ + +namespace google { +namespace protobuf { class Message; } +} // namespace google + +namespace proto2 { class Message; } + +namespace upb { + +class Handlers; + +namespace google { + +// Returns a upb::Handlers object that can be used to populate a proto2::Message +// object of the same type as "m." +// +// TODO(haberman): Add handler caching functionality so that we don't use +// O(n^2) memory in the worst case when incrementally building handlers. +const upb::Handlers* NewWriteHandlers(const proto2::Message& m, void *owner); +const upb::Handlers* NewWriteHandlers(const ::google::protobuf::Message& m, + void *owner); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_BRIDGE_H_ diff --git a/upb/google/cord.h b/upb/google/cord.h new file mode 100644 index 0000000..c579c0c --- /dev/null +++ b/upb/google/cord.h @@ -0,0 +1,48 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Functionality for interoperating with Cord. Only needed inside Google. + +#ifndef UPB_GOOGLE_CORD_H +#define UPB_GOOGLE_CORD_H + +#include "strings/cord.h" +#include "upb/bytestream.h" + +namespace upb { + +namespace proto2_bridge_google3 { class FieldAccessor; } +namespace proto2_bridge_opensource { class FieldAccessor; } + +namespace google { + +class P2R_Handlers; + +class CordSupport { + private: + UPB_DISALLOW_POD_OPS(CordSupport); + + inline static void AssignToCord(const upb::ByteRegion* r, Cord* cord) { + // TODO(haberman): ref source data if source is a cord. + cord->Clear(); + uint64_t ofs = r->start_ofs(); + while (ofs < r->end_ofs()) { + size_t len; + const char *buf = r->GetPtr(ofs, &len); + cord->Append(StringPiece(buf, len)); + ofs += len; + } + } + + friend class ::upb::proto2_bridge_google3::FieldAccessor; + friend class ::upb::proto2_bridge_opensource::FieldAccessor; + friend class P2R_Handlers; +}; + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_CORD_H diff --git a/upb/google/proto1.cc b/upb/google/proto1.cc new file mode 100644 index 0000000..bb9ff75 --- /dev/null +++ b/upb/google/proto1.cc @@ -0,0 +1,502 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This set of handlers can write into a proto2::Message whose reflection class +// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the +// name "Proto2Reflection" indicates that it is a reflection class implementing +// the proto2 reflection interface, but is used for proto1 generated messages). +// +// Like FieldAccessor this depends on breaking encapsulation, and will need to +// be changed if and when the details of _pi::Proto2Reflection change. +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ + +#include "upb/google/proto1.h" + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef private + +// TODO(haberman): friend upb so that this isn't required. +#define private public +#include "net/proto/proto2_reflection.h" +#undef private + +#include "net/proto/internal_layout.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/google/cord.h" +#include "upb/handlers.h" + +template static T* GetPointer(void *message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} + +namespace upb { +namespace google { + +class P2R_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + const proto2::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + switch (r->GetFieldLayout(proto2_f)->crep) { +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_ ## name: \ + case _pi::CREP_OPTIONAL_ ## name: \ + case _pi::CREP_REPEATED_ ## name: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); return true; + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); +#undef PRIMITIVE + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: + SetOutOfLineStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: + SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: + SetMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: + case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2: + SetWeakMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: assert(false); return false; + } + } + + // If the field "f" in the message "m" is a weak field, returns the prototype + // of the submessage (which may be a specific type or may be OpaqueMessage). + // Otherwise returns NULL. + static const proto2::Message* GetWeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast(m.GetReflection()); + if (!r) return NULL; + + const _pi::Field* field = r->GetFieldLayout(f); + if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + return static_cast( + field->weak_layout()->default_instance); + } else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) { + return field->proto2_weak_default_instance(); + } else { + return NULL; + } + } + + // If "m" is a message that uses Proto2Reflection, returns the prototype of + // the submessage (which may be OpaqueMessage for a weak field that is not + // linked in). Otherwise returns NULL. + static const proto2::Message* GetFieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const proto2::Message* ret = GetWeakPrototype(m, f); + if (ret) { + return ret; + } else if (dynamic_cast(m.GetReflection())) { + // Since proto1 has no dynamic message, it must be from the generated + // factory. + assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE); + ret = proto2::MessageFactory::generated_factory()->GetPrototype( + f->message_type()); + assert(ret); + return ret; + } else { + return NULL; + } + } + + private: + class FieldOffset { + public: + FieldOffset( + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : offset_(GetOffset(f, r)), + is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template T* GetFieldPointer(void* message) const { + return GetPointer(message, offset_); + } + + void SetHasbit(void* message) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer(message, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + + static int16_t GetHasbit(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + assert(!f->is_repeated()); + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + + static uint16_t GetOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + // StartSequence ///////////////////////////////////////////////////////////// + + static void SetStartSequenceHandler( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(f->IsSequence()); + h->SetStartSequenceHandler( + f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + + static void* PushOffset(void *m, void *fval) { + const FieldOffset* offset = static_cast(fval); + return offset->GetFieldPointer(m); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template static void SetPrimitiveHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler(f, &Append, NULL, NULL); + } else { + upb::SetStoreValueHandler( + f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + } + } + + template + static bool Append(void *_r, void *fval, T val) { + UPB_UNUSED(fval); + // Proto1's ProtoArray class derives from proto2::RepeatedField. + proto2::RepeatedField* r = static_cast*>(_r); + r->Add(val); + return true; + } + + // String //////////////////////////////////////////////////////////////////// + + static void SetStringHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartString, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartString(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* info = static_cast(fval); + info->SetHasbit(m); + string* str = info->GetFieldPointer(m); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + static size_t OnStringBuf(void *_s, void *fval, const char *buf, size_t n) { + string* s = static_cast(_s); + s->append(buf, n); + return n; + } + + static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(fval); + proto2::RepeatedPtrField* r = + static_cast*>(_r); + string* str = r->Add(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static void SetOutOfLineStringHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + // This type is only used for non-repeated string fields. + assert(!f->IsSequence()); + h->SetStartStringHandler( + f, &StartOutOfLineString, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + } + + static void* StartOutOfLineString(void *m, void *fval, size_t size_hint) { + const FieldOffset* info = static_cast(fval); + info->SetHasbit(m); + string **str = info->GetFieldPointer(m); + if (*str == &::ProtocolMessage::___empty_internal_proto_string_) + *str = new string(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnCordBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartCord(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + Cord* c = static_cast(_c); + c->Append(StringPiece(buf, n)); + return true; + } + + static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + proto2::RepeatedField* r = + static_cast*>(_r); + return r->Add(); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData( + const proto2::Message& prototype, + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : FieldOffset(f, r) { + prototype_ = GetWeakPrototype(prototype, f); + if (!prototype_) + prototype_ = GetFieldPrototype(prototype, f); + } + + const proto2::Message* prototype() const { return prototype_; } + + private: + const proto2::Message* prototype_; + }; + + static void SetStartSubMessageHandler( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + upb::Handlers::StartFieldHandler* handler, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStartSubMessageHandler( + f, handler, + new SubMessageHandlerData(m, proto2_f, r), + &upb::DeletePointer); + } + + static void SetRequiredMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + h->SetStartSubMessageHandler( + f, &StartRequiredSubMessage, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartRequiredSubMessage(void *m, void *fval) { + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + return offset->GetFieldPointer(m); + } + + static void SetMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + SetStartSubMessageHandler(proto2_f, m, r, &StartSubMessage, f, h); + } + } + + static void SetWeakMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + SetStartSubMessageHandler(proto2_f, m, r, &StartWeakSubMessage, f, h); + } + } + + static void* StartSubMessage(void *m, void *fval) { + const SubMessageHandlerData* info = + static_cast(fval); + info->SetHasbit(m); + proto2::Message **subm = info->GetFieldPointer(m); + if (*subm == info->prototype()) *subm = (*subm)->New(); + return *subm; + } + + static void* StartWeakSubMessage(void *m, void *fval) { + const SubMessageHandlerData* info = + static_cast(fval); + info->SetHasbit(m); + proto2::Message **subm = info->GetFieldPointer(m); + if (*subm == NULL) { + *subm = info->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static void* StartRepeatedSubMessage(void* _r, void *fval) { + const SubMessageHandlerData* info = + static_cast(fval); + proto2::internal::RepeatedPtrFieldBase *r = + static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + submsg = info->prototype()->New(); + r->AddAllocated(submsg); + } + return submsg; + } +}; + +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return P2R_Handlers::TrySet(proto2_f, m, upb_f, h); +} + +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetWeakPrototype(m, f); +} + +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/google/proto1.h b/upb/google/proto1.h new file mode 100644 index 0000000..f35fb13 --- /dev/null +++ b/upb/google/proto1.h @@ -0,0 +1,53 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Support for registering field handlers that can write into a legacy proto1 +// message. This functionality is only needed inside Google. +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO1_H_ +#define UPB_GOOGLE_PROTO1_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); + +// Returns a prototype for the given non-weak field in "m". +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO1_H_ diff --git a/upb/google/proto2.cc b/upb/google/proto2.cc new file mode 100644 index 0000000..264530c --- /dev/null +++ b/upb/google/proto2.cc @@ -0,0 +1,632 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/google/proto2.h" + +#include "upb/google/proto1.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/handlers.h" + +namespace upb { +namespace proto2_bridge_google3 { class FieldAccessor; } +namespace proto2_bridge_opensource { class FieldAccessor; } +} // namespace upb + +// BEGIN DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////// + +#ifdef UPB_GOOGLE3 + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef protected + +#define private public +#include "net/proto2/public/generated_message_reflection.h" +#undef private + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/string_piece_field_support.h" +#include "upb/google/cord.h" + +namespace goog = ::proto2; +namespace me = ::upb::proto2_bridge_google3; + +#else + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/message.h" + +namespace goog = ::google::protobuf; +namespace me = ::upb::proto2_bridge_opensource; + +#endif // ifdef UPB_GOOGLE3 + +// END DOUBLE COMPILATION TRICKERY. //////////////////////////////////////////// + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +template static T* GetPointer(void *message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} + +// This class contains handlers that can write into a proto2 class whose +// reflection class is GeneratedMessageReflection. (Despite the name, even +// DynamicMessage uses GeneratedMessageReflection, so this covers all proto2 +// messages generated by the compiler.) To do this it must break the +// encapsulation of GeneratedMessageReflection and therefore depends on +// internal interfaces that are not guaranteed to be stable. This class will +// need to be updated if any non-backward-compatible changes are made to +// GeneratedMessageReflection. +// +// TODO(haberman): change class name? In retrospect, "FieldAccessor" isn't the +// best (something more specific like GeneratedMessageReflectionHandlers or +// GMR_Handlers would be better) but we're depending on a "friend" declaration +// in proto2 that already specifies "FieldAccessor." No versions of proto2 have +// been released that include the "friend FieldAccessor" declaration, so there's +// still time to change this. On the other hand, perhaps it's simpler to just +// rely on "#define private public" since it may be a long time before new +// versions of proto2 open source are pervasive enough that we can remove this +// anyway. +class me::FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + const goog::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + switch (proto2_f->cpp_type()) { +#define PRIMITIVE_TYPE(cpptype, cident) \ + case goog::FieldDescriptor::cpptype: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); return true; + PRIMITIVE_TYPE(CPPTYPE_INT32, int32_t); + PRIMITIVE_TYPE(CPPTYPE_INT64, int64_t); + PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32_t); + PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64_t); + PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double); + PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); + PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); +#undef PRIMITIVE_TYPE + case goog::FieldDescriptor::CPPTYPE_ENUM: + SetEnumHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldDescriptor::CPPTYPE_STRING: { + // Old versions of the open-source protobuf release erroneously default + // to Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() : UPB_CTYPE_STRING; + switch (ctype) { +#ifdef UPB_GOOGLE3 + case goog::FieldOptions::STRING: + SetStringHandlers(proto2_f, m, r, upb_f, h); + return true; + case goog::FieldOptions::CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::STRING_PIECE: + SetStringPieceHandlers(proto2_f, r, upb_f, h); + return true; +#else + case UPB_CTYPE_STRING: + SetStringHandlers(proto2_f, m, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + case goog::FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (proto2_f->options().lazy()) { + return false; // Not yet implemented. + } else { + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + } +#else + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + + static const goog::Message* GetFieldPrototype( + const goog::Message& m, + const goog::FieldDescriptor* f) { + // We assume that all submessages (and extensions) will be constructed + // using the same MessageFactory as this message. This doesn't cover the + // case of CodedInputStream::SetExtensionRegistry(). + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast( + m.GetReflection()); + if (!r) return NULL; + return r->message_factory_->GetPrototype(f->message_type()); + } + + private: + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int64_t GetHasbit( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + // proto2 does not store hasbits for repeated fields. + assert(!f->is_repeated()); + return (r->has_bits_offset_ * 8) + f->index(); + } + + static uint16_t GetOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + class FieldOffset { + public: + FieldOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(GetOffset(f, r)), + is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template T* GetFieldPointer(void *message) const { + return GetPointer(message, offset_); + } + + void SetHasbit(void* m) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer(m, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + // StartSequence ///////////////////////////////////////////////////////////// + + static void SetStartSequenceHandler( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(f->IsSequence()); + h->SetStartSequenceHandler( + f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + + static void* PushOffset(void *m, void *fval) { + const FieldOffset* offset = static_cast(fval); + return offset->GetFieldPointer(m); + } + + // Primitive Value (numeric, bool) /////////////////////////////////////////// + + template static void SetPrimitiveHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler(f, &AppendPrimitive, NULL, NULL); + } else { + upb::SetStoreValueHandler( + f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + } + } + + template + static bool AppendPrimitive(void *_r, void *fval, T val) { + UPB_UNUSED(fval); + goog::RepeatedField* r = static_cast*>(_r); + r->Add(val); + return true; + } + + // Enum ////////////////////////////////////////////////////////////////////// + + class EnumHandlerData : public FieldOffset { + public: + EnumHandlerData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f) + : FieldOffset(proto2_f, r), + field_number_(f->number()), + unknown_fields_offset_(r->unknown_fields_offset_), + enum_(upb_downcast_enumdef(f->subdef())) { + } + + bool IsValidValue(int32_t val) const { + return enum_->FindValueByNumber(val) != NULL; + } + + int32_t field_number() const { return field_number_; } + + goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { + return GetPointer(m, unknown_fields_offset_); + } + + private: + int32_t field_number_; + size_t unknown_fields_offset_; + const upb::EnumDef* enum_; + }; + + static void SetEnumHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + EnumHandlerData* data = new EnumHandlerData(proto2_f, r, f); + if (f->IsSequence()) { + h->SetInt32Handler( + f, &AppendEnum, data, &upb::DeletePointer); + } else { + h->SetInt32Handler( + f, &SetEnum, data, &upb::DeletePointer); + } + } + + static bool SetEnum(void *_m, void *fval, int32_t val) { + goog::Message* m = static_cast(_m); + const EnumHandlerData* data = static_cast(fval); + if (data->IsValidValue(val)) { + int32_t* message_val = data->GetFieldPointer(m); + *message_val = val; + data->SetHasbit(m); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + return true; + } + + static bool AppendEnum(void *_m, void *fval, int32_t val) { + // Closure is the enclosing message. We can't use the RepeatedField<> as + // the closure because we need to go back to the message for unrecognized + // enum values, which go into the unknown field set. + goog::Message* m = static_cast(_m); + const EnumHandlerData* data = static_cast(fval); + if (data->IsValidValue(val)) { + goog::RepeatedField* r = + data->GetFieldPointer >(m); + r->Add(val); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + return true; + } + + // String //////////////////////////////////////////////////////////////////// + + // For scalar (non-repeated) string fields. + template + class StringHandlerData : public FieldOffset { + public: + StringHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message& prototype) + : FieldOffset(proto2_f, r) { + // "prototype" isn't guaranteed to be empty, so we create a copy to get + // the default string instance. + goog::Message* empty = prototype.New(); + prototype_ = &r->GetStringReference(*empty, proto2_f, NULL); + delete empty; + } + + const T* prototype() const { return prototype_; } + + T** GetStringPointer(void *message) const { + return GetFieldPointer(message); + } + + private: + const T* prototype_; + }; + + template static void SetStringHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); + } else { + StringHandlerData* data = new StringHandlerData(proto2_f, r, m); + h->SetStartStringHandler( + f, &StartString, data, &upb::DeletePointer >); + } + } + + // This needs to be templated because google3 string is not std::string. + template static void* StartString( + void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const StringHandlerData* data = + static_cast*>(fval); + T** str = data->GetStringPointer(m); + data->SetHasbit(m); + // If it points to the default instance, we must create a new instance. + if (*str == data->prototype()) *str = new T(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + template static size_t OnStringBuf( + void *_str, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + T* str = static_cast(_str); + str->append(buf, n); + return n; + } + + + template + static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + goog::RepeatedPtrField* r = static_cast*>(_r); + T* str = r->Add(); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : FieldOffset(f, r), + prototype_(prototype) { + } + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + SubMessageHandlerData* data = + new SubMessageHandlerData(proto2_f, r, GetFieldPrototype(m, proto2_f)); + upb::Handlers::Free* free = &upb::DeletePointer; + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartSubMessageHandler(f, &StartRepeatedSubMessage, data, free); + } else { + h->SetStartSubMessageHandler(f, &StartSubMessage, data, free); + } + } + + static void* StartSubMessage(void *m, void *fval) { + const SubMessageHandlerData* data = + static_cast(fval); + data->SetHasbit(m); + goog::Message **subm = data->GetFieldPointer(m); + if (*subm == NULL || *subm == data->prototype()) { + *subm = data->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static void* StartRepeatedSubMessage(void* _r, void *fval) { + const SubMessageHandlerData* data = + static_cast(fval); + goog::internal::RepeatedPtrFieldBase *r = + static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + submsg = data->prototype()->New(); + r->AddAllocated(submsg); + } + return submsg; + } + + // TODO(haberman): handle Extensions, Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnCordBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static void* StartCord(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + Cord* c = static_cast(_c); + c->Append(StringPiece(buf, n)); + return n; + } + + static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + proto2::RepeatedField* r = + static_cast*>(_r); + return r->Add(); + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static void SetStringPieceHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnStringPieceBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedStringPiece, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartStringPiece, new FieldOffset(proto2_f, r), + &upb::DeletePointer); + } + } + + static size_t OnStringPieceBuf(void *_f, void *fval, + const char *buf, size_t len) { + UPB_UNUSED(fval); + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + proto2::internal::StringPieceField* field = + static_cast(_f); + size_t new_len = field->size() + len; + char *data = new char[new_len]; + memcpy(data, field->data(), field->size()); + memcpy(data + field->size(), buf, len); + field->CopyFrom(StringPiece(data, new_len)); + delete[] data; + return len; + } + + static void* StartStringPiece(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* offset = static_cast(fval); + offset->SetHasbit(m); + proto2::internal::StringPieceField* field = + offset->GetFieldPointer(m); + field->Clear(); + return field; + } + + static void* StartRepeatedStringPiece(void* _r, void *fval, + size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + typedef proto2::RepeatedPtrField + RepeatedStringPiece; + RepeatedStringPiece* r = static_cast(_r); + proto2::internal::StringPieceField* field = r->Add(); + field->Clear(); + return field; + } + +#endif // UPB_GOOGLE3 +}; + +namespace upb { +namespace google { + +bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f, + const goog::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return me::FieldAccessor::TrySet(proto2_f, prototype, upb_f, h); +} + +const goog::Message* GetFieldPrototype( + const goog::Message& m, + const goog::FieldDescriptor* f) { + return me::FieldAccessor::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/google/proto2.h b/upb/google/proto2.h new file mode 100644 index 0000000..f2662ea --- /dev/null +++ b/upb/google/proto2.h @@ -0,0 +1,62 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// Support for registering field handlers that can write into a proto2 +// message that uses GeneratedMessageReflection (which includes all messages +// generated by the proto2 compiler as well as DynamicMessage). +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO2_H_ +#define UPB_GOOGLE_PROTO2_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +} +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); +bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f, + const ::google::protobuf::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetFieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); +const ::google::protobuf::Message* GetFieldPrototype( + const ::google::protobuf::Message& m, + const ::google::protobuf::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO2_H_ diff --git a/upb/handlers.c b/upb/handlers.c index 8350f64..8263c9a 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -1,292 +1,385 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman */ -#include #include "upb/handlers.h" +#include +#include + +// Defined for the sole purpose of having a unique pointer value for +// UPB_NO_CLOSURE. +char _upb_noclosure; + +typedef struct { + upb_func *handler; + + // Could put either or both of these in a separate table to save memory when + // they are sparse. + void *data; + upb_handlerfree *cleanup; + + // TODO(haberman): this is wasteful; only the first "fieldhandler" of a + // submessage field needs this. To reduce memory footprint we should either: + // - put the subhandlers in a separate "fieldhandler", stored as part of + // a union with one of the above fields. + // - count selector offsets by individual pointers instead of by whole + // fieldhandlers. + const upb_handlers *subhandlers; +} fieldhandler; + +static const fieldhandler *getfh( + const upb_handlers *h, upb_selector_t selector) { + assert(selector < upb_handlers_msgdef(h)->selector_count); + fieldhandler* fhbase = (void*)&h->fh_base; + return &fhbase[selector]; +} -/* upb_mhandlers **************************************************************/ +static fieldhandler *getfh_mutable(upb_handlers *h, upb_selector_t selector) { + return (fieldhandler*)getfh(h, selector); +} -static upb_mhandlers *upb_mhandlers_new(void) { - upb_mhandlers *m = malloc(sizeof(*m)); - upb_inttable_init(&m->fieldtab); - m->startmsg = NULL; - m->endmsg = NULL; - m->is_group = false; -#ifdef UPB_USE_JIT_X64 - m->tablearray = NULL; -#endif - return m; +bool upb_handlers_isfrozen(const upb_handlers *h) { + return upb_refcounted_isfrozen(upb_upcast(h)); } -static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated) { - const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); - // TODO: design/refine the API for changing the set of fields or modifying - // existing handlers. - if (v) return NULL; - upb_fhandlers new_f = {type, repeated, 0, - n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, -#ifdef UPB_USE_JIT_X64 - 0, 0, 0, -#endif - }; - upb_fhandlers *ptr = malloc(sizeof(*ptr)); - memcpy(ptr, &new_f, sizeof(upb_fhandlers)); - upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr)); - return ptr; +uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { + return upb_fielddef_isseq(f) ? 2 : 0; } -upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated) { - assert(type != UPB_TYPE(MESSAGE)); - assert(type != UPB_TYPE(GROUP)); - return _upb_mhandlers_newfhandlers(m, n, type, repeated); +uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { + uint32_t ret = 1; + if (upb_fielddef_isstring(f)) ret += 2; // STARTSTR/ENDSTR + if (upb_fielddef_isseq(f)) ret += 2; // STARTSEQ/ENDSEQ + if (upb_fielddef_issubmsg(f)) ret += 2; // STARTSUBMSG/ENDSUBMSG + return ret; } -upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated, - upb_mhandlers *subm) { - assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)); - assert(subm); - upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated); - if (!f) return NULL; - f->submsg = subm; - if (type == UPB_TYPE(GROUP)) - _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false); - return f; +upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_SINT32: + case UPB_TYPE_SFIXED32: + case UPB_TYPE_ENUM: + return UPB_HANDLER_INT32; + case UPB_TYPE_INT64: + case UPB_TYPE_SINT64: + case UPB_TYPE_SFIXED64: + return UPB_HANDLER_INT64; + case UPB_TYPE_UINT32: + case UPB_TYPE_FIXED32: + return UPB_HANDLER_UINT32; + case UPB_TYPE_UINT64: + case UPB_TYPE_FIXED64: + return UPB_HANDLER_UINT64; + case UPB_TYPE_FLOAT: + return UPB_HANDLER_FLOAT; + case UPB_TYPE_DOUBLE: + return UPB_HANDLER_DOUBLE; + case UPB_TYPE_BOOL: + return UPB_HANDLER_BOOL; + default: assert(false); return -1; // Invalid input. + } } -upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) { - const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); - return v ? upb_value_getptr(*v) : NULL; +bool upb_getselector( + const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s) { + // If the type checks in this function are a hot-spot, we can introduce a + // separate function that calculates the selector assuming that the type + // is correct (may even want to make it inline for the upb_sink fast-path. + switch (type) { + case UPB_HANDLER_INT32: + case UPB_HANDLER_INT64: + case UPB_HANDLER_UINT32: + case UPB_HANDLER_UINT64: + case UPB_HANDLER_FLOAT: + case UPB_HANDLER_DOUBLE: + case UPB_HANDLER_BOOL: + if (!upb_fielddef_isprimitive(f) || + upb_handlers_getprimitivehandlertype(f) != type) + return false; + *s = f->selector_base; + break; + case UPB_HANDLER_STARTSTR: + if (!upb_fielddef_isstring(f)) return false; + *s = f->selector_base; + break; + case UPB_HANDLER_STRING: + if (!upb_fielddef_isstring(f)) return false; + *s = f->selector_base + 1; + break; + case UPB_HANDLER_ENDSTR: + if (!upb_fielddef_isstring(f)) return false; + *s = f->selector_base + 2; + break; + case UPB_HANDLER_STARTSEQ: + if (!upb_fielddef_isseq(f)) return false; + *s = f->selector_base - 2; + break; + case UPB_HANDLER_ENDSEQ: + if (!upb_fielddef_isseq(f)) return false; + *s = f->selector_base - 1; + break; + case UPB_HANDLER_STARTSUBMSG: + if (!upb_fielddef_issubmsg(f)) return false; + *s = f->selector_base + 1; + break; + case UPB_HANDLER_ENDSUBMSG: + if (!upb_fielddef_issubmsg(f)) return false; + *s = f->selector_base + 2; + break; + } + assert(*s < upb_fielddef_msgdef(f)->selector_count); + return true; } +void upb_handlers_ref(const upb_handlers *h, const void *owner) { + upb_refcounted_ref(upb_upcast(h), owner); +} -/* upb_handlers ***************************************************************/ +void upb_handlers_unref(const upb_handlers *h, const void *owner) { + upb_refcounted_unref(upb_upcast(h), owner); +} -upb_handlers *upb_handlers_new() { - upb_handlers *h = malloc(sizeof(*h)); - h->refcount = 1; - h->msgs_len = 0; - h->msgs_size = 4; - h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); - h->should_jit = true; - return h; +void upb_handlers_donateref( + const upb_handlers *h, const void *from, const void *to) { + upb_refcounted_donateref(upb_upcast(h), from, to); } -void upb_handlers_ref(upb_handlers *h) { h->refcount++; } - -void upb_handlers_unref(upb_handlers *h) { - if (--h->refcount == 0) { - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *mh = h->msgs[i]; - upb_inttable_iter j; - upb_inttable_begin(&j, &mh->fieldtab); - for(; !upb_inttable_done(&j); upb_inttable_next(&j)) { - free(upb_value_getptr(upb_inttable_iter_value(&j))); - } - upb_inttable_uninit(&mh->fieldtab); -#ifdef UPB_USE_JIT_X64 - free(mh->tablearray); -#endif - free(mh); - } - free(h->msgs); - free(h); - } +void upb_handlers_checkref(const upb_handlers *h, const void *owner) { + upb_refcounted_checkref(upb_upcast(h), owner); +} + +static void do_cleanup(upb_handlers* h, const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + if (!upb_getselector(f, type, &selector)) return; + fieldhandler *fh = getfh_mutable(h, selector); + if (fh->cleanup) fh->cleanup(fh->data); + fh->cleanup = NULL; + fh->data = NULL; } -upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) { - if (h->msgs_len == h->msgs_size) { - h->msgs_size *= 2; - h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); +static void freehandlers(upb_refcounted *r) { + upb_handlers *h = (upb_handlers*)r; + upb_msg_iter i; + for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + for (upb_handlertype_t type = 0; type < UPB_HANDLER_MAX; type++) + do_cleanup(h, f, type); } - upb_mhandlers *mh = upb_mhandlers_new(); - h->msgs[h->msgs_len++] = mh; - return mh; + upb_msgdef_unref(h->msg, h); + free(h); } -static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure, upb_strtable *mtab) { - upb_mhandlers *mh = upb_handlers_newmhandlers(h); - upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh)); - if (msgreg_cb) msgreg_cb(closure, mh, m); +static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_handlers *h = (const upb_handlers*)r; upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); - upb_fhandlers *fh; - if (upb_issubmsg(f)) { - upb_mhandlers *sub_mh; - const upb_value *subm_ent; - // The table lookup is necessary to break the DFS for type cycles. - const char *subname = upb_def_fullname(upb_fielddef_subdef(f)); - if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) { - sub_mh = upb_value_getptr(*subm_ent); - } else { - sub_mh = upb_regmsg_dfs( - h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)), - msgreg_cb, fieldreg_cb, closure, mtab); - } - fh = upb_mhandlers_newfhandlers_subm( - mh, f->number, f->type, upb_isseq(f), sub_mh); - } else { - fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f)); - } - if (fieldreg_cb) fieldreg_cb(closure, fh, f); + if (!upb_fielddef_issubmsg(f)) continue; + const upb_handlers *sub = upb_handlers_getsubhandlers(h, f); + if (sub) visit(r, upb_upcast(sub), closure); } - return mh; } -upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure) { - upb_strtable mtab; - upb_strtable_init(&mtab); - upb_mhandlers *ret = - upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab); - upb_strtable_uninit(&mtab); - return ret; +upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) { + assert(upb_msgdef_isfrozen(md)); + static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers}; + size_t fhandlers_size = sizeof(fieldhandler) * md->selector_count; + upb_handlers *h = calloc(sizeof(*h) - sizeof(void*) + fhandlers_size, 1); + if (!h) return NULL; + h->msg = md; + upb_msgdef_ref(h->msg, h); + if (!upb_refcounted_init(upb_upcast(h), &vtbl, owner)) goto oom; + + // calloc() above initialized all handlers to NULL. + return h; + +oom: + freehandlers(upb_upcast(h)); + return NULL; } +bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { + // TODO: verify we have a transitive closure. + return upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s); +} + +const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } -/* upb_dispatcher *************************************************************/ - -void upb_dispatcher_init(upb_dispatcher *d, upb_status *status, - upb_exit_handler UPB_NORETURN *exit, - void *srcclosure) { - d->stack[0].f = NULL; // Should never be read. - d->limit = &d->stack[UPB_MAX_NESTING]; - d->exitjmp = exit; - d->srcclosure = srcclosure; - d->top_is_implicit = false; - d->msgent = NULL; - d->top = NULL; - d->toplevel_msgent = NULL; - d->status = status; +void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler) { + assert(!upb_handlers_isfrozen(h)); + h->startmsg = handler; } -upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure, - upb_mhandlers *top) { - d->msgent = top; - d->toplevel_msgent = top; - d->top = d->stack; - d->top->closure = closure; - d->top->is_sequence = false; - d->top->is_packed = false; - return d->top; +upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h) { + return h->startmsg; } -void upb_dispatcher_uninit(upb_dispatcher *d) { - (void)d; +void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler) { + assert(!upb_handlers_isfrozen(h)); + h->endmsg = handler; } -void upb_dispatch_startmsg(upb_dispatcher *d) { - upb_flow_t flow = UPB_CONTINUE; - if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); +upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h) { + return h->endmsg; } -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) { - assert(d->top == d->stack); - if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status); - // TODO: should we avoid this copy by passing client's status obj to cbs? - upb_status_copy(status, d->status); +// For now we stuff the subhandlers pointer into the fieldhandlers* +// corresponding to the UPB_HANDLER_STARTSUBMSG handler. +static const upb_handlers **subhandlersptr(upb_handlers *h, + const upb_fielddef *f) { + assert(upb_fielddef_issubmsg(f)); + upb_selector_t selector; + bool ok = upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector); + UPB_ASSERT_VAR(ok, ok); + return &getfh_mutable(h, selector)->subhandlers; } -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, - upb_fhandlers *f) { - if (d->top + 1 >= d->limit) { - upb_status_seterrliteral(d->status, "Nesting too deep."); - _upb_dispatcher_abortjmp(d); +bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, + const upb_handlers *sub) { + assert(!upb_handlers_isfrozen(h)); + if (!upb_fielddef_issubmsg(f)) return false; + if (sub != NULL && + upb_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) { + return false; } + const upb_handlers **stored = subhandlersptr(h, f); + const upb_handlers *old = *stored; + if (old) upb_unref2(old, h); + *stored = sub; + if (sub) upb_ref2(sub, h); + return true; +} - upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); - if (f->startseq) sflow = f->startseq(d->top->closure, f->fval); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (sflow.flow != UPB_CONTINUE) { - _upb_dispatcher_abortjmp(d); - } +const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, + const upb_fielddef *f) { + const upb_handlers **stored = subhandlersptr((upb_handlers*)h, f); + return *stored; +} - ++d->top; - d->top->f = f; - d->top->is_sequence = true; - d->top->is_packed = false; - d->top->closure = sflow.closure; - return d->top; +#define SETTER(name, handlerctype, handlertype) \ + bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \ + handlerctype val, void *data, \ + upb_handlerfree *cleanup) { \ + assert(!upb_handlers_isfrozen(h)); \ + if (upb_handlers_msgdef(h) != upb_fielddef_msgdef(f)) return false; \ + upb_selector_t selector; \ + bool ok = upb_getselector(f, handlertype, &selector); \ + if (!ok) return false; \ + do_cleanup(h, f, handlertype); \ + fieldhandler *fh = getfh_mutable(h, selector); \ + fh->handler = (upb_func*)val; \ + fh->data = (upb_func*)data; \ + fh->cleanup = (upb_func*)cleanup; \ + return true; \ + } \ + +SETTER(int32, upb_int32_handler*, UPB_HANDLER_INT32); +SETTER(int64, upb_int64_handler*, UPB_HANDLER_INT64); +SETTER(uint32, upb_uint32_handler*, UPB_HANDLER_UINT32); +SETTER(uint64, upb_uint64_handler*, UPB_HANDLER_UINT64); +SETTER(float, upb_float_handler*, UPB_HANDLER_FLOAT); +SETTER(double, upb_double_handler*, UPB_HANDLER_DOUBLE); +SETTER(bool, upb_bool_handler*, UPB_HANDLER_BOOL); +SETTER(startstr, upb_startstr_handler*, UPB_HANDLER_STARTSTR); +SETTER(string, upb_string_handler*, UPB_HANDLER_STRING); +SETTER(endstr, upb_endfield_handler*, UPB_HANDLER_ENDSTR); +SETTER(startseq, upb_startfield_handler*, UPB_HANDLER_STARTSEQ); +SETTER(startsubmsg, upb_startfield_handler*, UPB_HANDLER_STARTSUBMSG); +SETTER(endsubmsg, upb_endfield_handler*, UPB_HANDLER_ENDSUBMSG); +SETTER(endseq, upb_endfield_handler*, UPB_HANDLER_ENDSEQ); +#undef SETTER + +upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s) { + return getfh(h, s)->handler; } -upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) { - assert(d->top > d->stack); - assert(d->top->is_sequence); - upb_fhandlers *f = d->top->f; - --d->top; - upb_flow_t flow = UPB_CONTINUE; - if (f->endseq) flow = f->endseq(d->top->closure, f->fval); - if (flow != UPB_CONTINUE) { - _upb_dispatcher_abortjmp(d); - } - d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent; - return d->top; +void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s) { + return getfh(h, s)->data; } -upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fhandlers *f) { - if (d->top + 1 >= d->limit) { - upb_status_seterrliteral(d->status, "Nesting too deep."); - _upb_dispatcher_abortjmp(d); - } +typedef struct { + upb_inttable tab; // maps upb_msgdef* -> upb_handlers*. + upb_handlers_callback *callback; + void *closure; +} dfs_state; - upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); - if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (sflow.flow != UPB_CONTINUE) { - _upb_dispatcher_abortjmp(d); - } +static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, + dfs_state *s) { + upb_handlers *h = upb_handlers_new(m, owner); + if (!h) return NULL; + if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom; - ++d->top; - d->top->f = f; - d->top->is_sequence = false; - d->top->is_packed = false; - d->top->closure = sflow.closure; - d->msgent = f->submsg; - upb_dispatch_startmsg(d); - return d->top; -} + s->callback(s->closure, h); -upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) { - assert(d->top > d->stack); - assert(!d->top->is_sequence); - upb_fhandlers *f = d->top->f; - if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status); - d->msgent = d->top->f->msg; - --d->top; - upb_flow_t flow = UPB_CONTINUE; - if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); - return d->top; -} + // For each submessage field, get or create a handlers object and set it as + // the subhandlers. + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_fielddef_issubmsg(f)) continue; -bool upb_dispatcher_stackempty(upb_dispatcher *d) { - return d->top == d->stack; -} -bool upb_dispatcher_islegalend(upb_dispatcher *d) { - if (d->top == d->stack) return true; - if (d->top - 1 == d->stack && - d->top->is_sequence && !d->top->is_packed) return true; - return false; + const upb_msgdef *subdef = upb_downcast_msgdef(upb_fielddef_subdef(f)); + const upb_value *subm_ent = upb_inttable_lookupptr(&s->tab, subdef); + if (subm_ent) { + upb_handlers_setsubhandlers(h, f, upb_value_getptr(*subm_ent)); + } else { + upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s); + if (!sub_mh) goto oom; + upb_handlers_setsubhandlers(h, f, sub_mh); + upb_handlers_unref(sub_mh, &sub_mh); + } + } + return h; + +oom: + upb_handlers_unref(h, owner); + return NULL; } -void _upb_dispatcher_abortjmp(upb_dispatcher *d) { - d->exitjmp(d->srcclosure); - assert(false); // Never returns. +const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const void *owner, + upb_handlers_callback *callback, + void *closure) { + dfs_state state; + state.callback = callback; + state.closure = closure; + if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL; + + upb_handlers *ret = newformsg(m, owner, &state); + if (!ret) return NULL; + upb_refcounted *r = upb_upcast(ret); + upb_status status = UPB_STATUS_INIT; + bool ok = upb_refcounted_freeze(&r, 1, &status); + UPB_ASSERT_VAR(ok, ok); + upb_status_uninit(&status); + + upb_inttable_uninit(&state.tab); + return ret; } + +#define STDMSG_WRITER(type, ctype) \ + bool upb_stdmsg_set ## type (void *_m, void *fval, ctype val) { \ + assert(_m != NULL); \ + const upb_stdmsg_fval *f = fval; \ + uint8_t *m = _m; \ + if (f->hasbit > 0) \ + *(uint8_t*)&m[f->hasbit / 8] |= 1 << (f->hasbit % 8); \ + *(ctype*)&m[f->offset] = val; \ + return true; \ + } \ + +STDMSG_WRITER(double, double) +STDMSG_WRITER(float, float) +STDMSG_WRITER(int32, int32_t) +STDMSG_WRITER(int64, int64_t) +STDMSG_WRITER(uint32, uint32_t) +STDMSG_WRITER(uint64, uint64_t) +STDMSG_WRITER(bool, bool) +#undef STDMSG_WRITER diff --git a/upb/handlers.h b/upb/handlers.h index 6d8f9f2..094702e 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -1,399 +1,689 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * upb_handlers is a generic visitor-like interface for iterating over a stream - * of protobuf data. You can register function pointers that will be called - * for each message and/or field as the data is being parsed or iterated over, - * without having to know the source format that we are parsing from. This - * decouples the parsing logic from the processing logic. + * A upb_handlers is like a virtual table for a upb_msgdef. Each field of the + * message can have associated functions that will be called when we are + * parsing or visiting a stream of data. This is similar to how handlers work + * in SAX (the Simple API for XML). * - * TODO: should we allow handlers to longjmp()? Would be necessary to eg. let - * a Lua handler "yield" from the current coroutine. I *think* everything - * would "just work" with our current decoder. + * The handlers have no idea where the data is coming from, so a single set of + * handlers could be used with two completely different data sources (for + * example, a parser and a visitor over in-memory objects). This decoupling is + * the most important feature of upb, because it allows parsers and serializers + * to be highly reusable. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_HANDLERS_H #define UPB_HANDLERS_H -#include "upb/upb.h" #include "upb/def.h" -#include "upb/bytestream.h" #ifdef __cplusplus -extern "C" { +namespace upb { class Handlers; } +typedef upb::Handlers upb_handlers; +#else +struct upb_handlers; +typedef struct upb_handlers upb_handlers; #endif -/* Handlers protocol definition ***********************************************/ - -// A upb_handlers object represents a graph of handlers. Each message can have -// a set of handlers as well as a set of fields which themselves have handlers. -// Fields that represent submessages or groups are linked to other message -// handlers, so the overall set of handlers can form a graph structure (which -// may be cyclic). -// -// The upb_mhandlers (message handlers) object can have the following handlers: -// -// static upb_flow_t startmsg(void *closure) { -// // Called when the message begins. "closure" was supplied by our caller. -// return UPB_CONTINUE; -// } -// -// static void endmsg(void *closure, upb_status *status) { -// // Called when processing of this message ends, whether in success or -// // failure. "status" indicates the final status of processing, and can -// / also be modified in-place to update the final status. -// // -// // Since this callback is guaranteed to always be called eventually, it -// // can be used to free any resources that were allocated during processing. -// } -// -// TODO: unknown field handler. -// -// The upb_fhandlers (field handlers) object can have the following handlers: -// -// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { -// // Called when the field's value is encountered. "fval" contains -// // whatever value was bound to this field at registration type -// // (for upb_register_all(), this will be the field's upb_fielddef*). -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { -// // Called when a submessage begins. The second element of the return -// // value is the closure for the submessage. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endsubmsg(void *closure, upb_value fval) { -// // Called when a submessage ends. -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startseq(void *closure, upb_value fval) { -// // Called when a sequence (repeated field) begins. The second element -// // of the return value is the closure for the sequence. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endseq(void *closure, upb_value fval) { -// // Called when a sequence ends. -// return UPB_CONTINUE; -// } -// -// All handlers except the endmsg handler return a value from this enum, to -// control whether parsing will continue or not. +// All the different types of handlers that can be registered. +// Only needed for the advanced functions in upb::Handlers. typedef enum { - // Data source should continue calling callbacks. - UPB_CONTINUE = 0, + UPB_HANDLER_INT32, + UPB_HANDLER_INT64, + UPB_HANDLER_UINT32, + UPB_HANDLER_UINT64, + UPB_HANDLER_FLOAT, + UPB_HANDLER_DOUBLE, + UPB_HANDLER_BOOL, + UPB_HANDLER_STARTSTR, + UPB_HANDLER_STRING, + UPB_HANDLER_ENDSTR, + UPB_HANDLER_STARTSUBMSG, + UPB_HANDLER_ENDSUBMSG, + UPB_HANDLER_STARTSEQ, + UPB_HANDLER_ENDSEQ, +} upb_handlertype_t; + +#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1) + +#define UPB_BREAK NULL + +// A convenient definition for when no closure is needed. +extern char _upb_noclosure; +#define UPB_NO_CLOSURE &_upb_noclosure + +// A selector refers to a specific field handler in the Handlers object +// (for example: the STARTSUBMSG handler for field "field15"). +typedef uint32_t upb_selector_t; - // Halt processing permanently (in a non-resumable way). The endmsg handlers - // for any currently open messages will be called which can supply a more - // specific status message. No further input data will be consumed. - UPB_BREAK = -1, +#ifdef __cplusplus - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). In other words, it is - // like a UPB_BREAK that applies only to the current level. +// A upb::Handlers object represents the set of handlers associated with a +// message in the graph of messages. You can think of it as a big virtual +// table with functions corresponding to all the events that can fire while +// parsing or visiting a message of a specific type. +// +// Any handlers that are not set behave as if they had successfully consumed +// the value. For start* handlers that return a void* closure, an unset handler +// will propagate the existing closure. +class upb::Handlers { + public: + typedef upb_selector_t Selector; + typedef upb_handlertype_t Type; + + typedef bool StartMessageHandler(void* closure); + typedef void EndMessageHandler(void* closure, Status* status); + typedef void* StartFieldHandler(void* closure, void* data); + typedef bool EndFieldHandler(void *closure, void *data); + typedef void* StartStringHandler(void *c, void *d, size_t size_hint); + typedef size_t StringHandler(void *c, void *d, const char *buf, size_t len); + + template struct Value { + typedef bool Handler(void* closure, void* data, T val); + }; + + typedef Value::Handler Int32Handler; + typedef Value::Handler Int64Handler; + typedef Value::Handler Uint32Handler; + typedef Value::Handler Uint64Handler; + typedef Value::Handler FloatHandler; + typedef Value::Handler DoubleHandler; + typedef Value::Handler BoolHandler; + + // Any function pointer can be converted to this and converted back to its + // correct type. + typedef void GenericFunction(); + + // For freeing handler data. + typedef void Free(void *data); + + typedef void HandlersCallback(void *closure, upb_handlers *h); + + // Returns a new handlers object for the given frozen msgdef. A single ref + // will belong to the given owner. + // Returns NULL if memory allocation failed. + static Handlers* New(const MessageDef* m, const void *owner); + + // Convenience function for registering a graph of handlers that mirrors the + // graph of msgdefs for some message. For "m" and all its children a new set + // of handlers will be created and the given callback will be invoked, + // allowing the client to register handlers for this message. Note that any + // subhandlers set by the callback will be overwritten. + static const Handlers* NewFrozen(const MessageDef *m, const void *owner, + HandlersCallback *callback, void *closure); + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Freezes the given set of handlers. You may not freeze a handler without + // also freezing any handlers they point to. In the future we may want to + // require that all fields of the submessage have had subhandlers set for + // them. + static bool Freeze(Handlers*const* handlers, int n, Status* s); + + // Returns the msgdef associated with this handlers object. + const MessageDef* message_def() const; + + // Sets the startmsg handler for the message, which is defined as follows: + // + // bool startmsg(void *closure) { + // // Called when the message begins. Returns true if processing should + // // continue. + // return true; + // } + void SetStartMessageHandler(StartMessageHandler *handler); + StartMessageHandler *GetStartMessageHandler() const; + + // Sets the endmsg handler for the message, which is defined as follows: + // + // void endmsg(void *closure, upb_status *status) { + // // Called when processing of this message ends, whether in success or + // // failure. "status" indicates the final status of processing, and + // // can also be modified in-place to update the final status. + // } + void SetEndMessageHandler(EndMessageHandler *handler); + EndMessageHandler *GetEndMessageHandler() const; + + // Sets the value handler for the given field, which is defined as follows + // (this is for an int32 field; other field types will pass their native + // C/C++ type for "val"): + // + // bool value(void *closure, void *d, int32_t val) { + // // Called when the field's value is encountered. "d" contains + // // whatever data was bound to this field when it was registered. + // // Returns true if processing should continue. + // return true; + // } + // + // The value type must exactly match f->type(). + // For example, SetInt32Handler() may only be used for fields of type + // UPB_TYPE_INT32, UPB_TYPE_SINT32, UPB_TYPE_SFIXED32, and UPB_TYPE_ENUM. + // + // "d" is the data that will be bound to this callback and passed to it. + // If "fr" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or has the wrong + // type for this handler. + bool SetInt32Handler (const FieldDef* f, Int32Handler* h, void* d, Free* fr); + bool SetInt64Handler (const FieldDef* f, Int64Handler* h, void* d, Free* fr); + bool SetUint32Handler(const FieldDef* f, Uint32Handler* h, void* d, Free* fr); + bool SetUint64Handler(const FieldDef* f, Uint64Handler* h, void* d, Free* fr); + bool SetFloatHandler (const FieldDef* f, FloatHandler* h, void* d, Free* fr); + bool SetDoubleHandler(const FieldDef* f, DoubleHandler* h, void* d, Free* fr); + bool SetBoolHandler (const FieldDef* f, BoolHandler* h, void* d, Free* fr); + + // Sets handlers for a string field, which are defined as follows: + // + // void* startstr(void *closure, void *data, size_t size_hint) { + // // Called when a string value begins. The return value indicates the + // // closure for the string. "size_hint" indicates the size of the + // // string if it is known, however if the string is length-delimited + // // and the end-of-string is not available size_hint will be zero. + // // This case is indistinguishable from the case where the size is + // // known to be zero. + // // + // // TODO(haberman): is it important to distinguish these cases? + // // If we had ssize_t as a type we could make -1 "unknown", but + // // ssize_t is POSIX (not ANSI) and therefore less portable. + // // In practice I suspect it won't be important to distinguish. + // return closure; + // } // - // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will - // be called to perform cleanup and return a status. Returning - // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg, - // endmsg, or endsubmsg handlers. + // size_t str(void *closure, void *data, const char *str, size_t len) { + // // Called for each buffer of string data; the multiple physical buffers + // // are all part of the same logical string. The return value indicates + // // how many bytes were consumed. If this number is less than "len", + // // this will also indicate that processing should be halted for now, + // // like returning false or UPB_BREAK from any other callback. If + // // number is greater than "len", the excess bytes will be skipped over + // // and not passed to the callback. + // return len; + // } // - // If UPB_SKIPSUBMSG is called from the top-level message, no further input - // data will be consumed. - UPB_SKIPSUBMSG = -2, + // bool endstr(void *closure, void *data) { + // // Called when a string value ends. + // return true; + // } + bool SetStartStringHandler(const FieldDef* f, StartStringHandler* h, + void* d, Free* fr); + bool SetStringHandler(const FieldDef* f, StringHandler* h, void* d, Free* fr); + bool SetEndStringHandler(const FieldDef* f, EndFieldHandler* h, + void* d, Free* fr); + + // A setter that is templated on the type of the value. + template bool SetValueHandler( + const FieldDef* f, typename Value::Handler* h, void* d, Free* fr); + + // Sets the startseq handler, which is defined as follows: + // + // void *startseq(void *closure, void *data) { + // // Called when a sequence (repeated field) begins. The returned + // // pointer indicates the closure for the sequence (or UPB_BREAK + // // to interrupt processing). + // return closure; + // } + // + // Returns "false" if "f" does not belong to this message or is not a + // repeated field. + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + bool SetStartSequenceHandler(const FieldDef* f, StartFieldHandler *handler, + void* data, Free* cleanup); - // TODO: Add UPB_SUSPEND, for resumable producers/consumers. -} upb_flow_t; + // Sets the startsubmsg handler for the given field, which is defined as + // follows: + // + // void *startsubmsg(void *closure, void *data) { + // // Called when a submessage begins. The returned pointer indicates the + // // closure for the sequence (or UPB_BREAK to interrupt processing). + // return closure; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // submessage/group field. + bool SetStartSubMessageHandler(const FieldDef* f, StartFieldHandler *handler, + void* data, Free* cleanup); -// The startsubmsg handler needs to also pass a closure to the submsg. -typedef struct { - upb_flow_t flow; - void *closure; -} upb_sflow_t; + // Sets the endsubmsg handler for the given field, which is defined as + // follows: + // + // bool endsubmsg(void *closure, void *data) { + // // Called when a submessage ends. Returns true to continue processing. + // return true; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // submessage/group field. + bool SetEndSubMessageHandler(const FieldDef* f, EndFieldHandler *handler, + void* data, Free* cleanup); -INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { - upb_sflow_t ret = {flow, closure}; - return ret; -} -#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) -#define UPB_SBREAK UPB_SFLOW(UPB_BREAK, NULL) + // Starts the endsubseq handler for the given field, which is defined as + // follows: + // + // bool endseq(void *closure, void *data) { + // // Called when a sequence ends. Returns true continue processing. + // return true; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // repeated field. + bool SetEndSequenceHandler(const FieldDef* f, EndFieldHandler *handler, + void* data, Free* cleanup); + + // Sets or gets the object that specifies handlers for the given field, which + // must be a submessage or group. Returns NULL if no handlers are set. + bool SetSubHandlers(const FieldDef* f, const Handlers* sub); + const Handlers* GetSubHandlers(const FieldDef* f) const; + + // NOTE: The remaining functions in this class are mostly of interest to + // byte-code/JIT compilers (or upb internals); most users will not need them. + // These functions also require more care, since passing a selector that + // does not match the type of these handlers yields undefined behavior. + + // A selector refers to a specific field handler in the Handlers object + // (for example: the STARTSUBMSG handler for field "field15"). + // On success, returns true and stores the selector in "s". + // If the FieldDef or Type are invalid, returns false. + // The returned selector is ONLY valid for Handlers whose MessageDef + // contains this FieldDef. + static bool GetSelector(const FieldDef* f, Type type, Selector* s); + + // Returns the function pointer for this handler. It is the client's + // responsibility to cast to the correct function type before calling it. + GenericFunction* GetHandler(Selector selector); + + // Returns the handler data that was registered with this handler. + void* GetHandlerData(Selector selector); + + // Gets the byte offset from a Handlers* where the given handler can be found. + // Useful for JITs that want to read the pointer in their fast path. + static size_t GetHandlerOffset(Selector selector); + + // Could add any of the following functions as-needed, with some minor + // implementation changes: + // + // const FieldDef* GetFieldDef(Selector selector); + // static bool IsSequence(Selector selector); + // Selector GetEndSelector(Selector start_selector); -// Typedefs for all of the handler functions defined above. -typedef upb_flow_t (upb_startmsg_handler)(void *c); -typedef void (upb_endmsg_handler)(void *c, upb_status *status); -typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val); -typedef upb_sflow_t (upb_startfield_handler)(void *closure, upb_value fval); -typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval); + private: + UPB_DISALLOW_POD_OPS(Handlers); +#else +struct upb_handlers { +#endif + upb_refcounted base; + const upb_msgdef *msg; + bool (*startmsg)(void*); + void (*endmsg)(void*, upb_status*); + void *fh_base[1]; // Start of dynamically-sized field handler array. +}; -/* upb_fhandlers **************************************************************/ +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +typedef bool upb_startmsg_handler(void *c); +typedef void upb_endmsg_handler(void *c, upb_status *status); +typedef void* upb_startfield_handler(void *closure, void *d); +typedef bool upb_endfield_handler(void *closure, void *d); +typedef void upb_handlers_callback(void *closure, upb_handlers *h); +typedef void upb_handlerfree(void *d); +typedef void upb_func(); + +typedef bool upb_int32_handler(void *c, void *d, int32_t val); +typedef bool upb_int64_handler(void *c, void *d, int64_t val); +typedef bool upb_uint32_handler(void *c, void *d, uint32_t val); +typedef bool upb_uint64_handler(void *c, void *d, uint64_t val); +typedef bool upb_float_handler(void *c, void *d, float val); +typedef bool upb_double_handler(void *c, void *d, double val); +typedef bool upb_bool_handler(void *c, void *d, bool val); +typedef void* upb_startstr_handler(void *closure, void *d, size_t size_hint); +typedef size_t upb_string_handler(void *c, void *d, const char *buf, size_t n); + +upb_handlers *upb_handlers_new(const upb_msgdef *m, const void *owner); +const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const void *owner, + upb_handlers_callback *callback, + void *closure); + +// From upb_refcounted. +void upb_handlers_unref(const upb_handlers *h, const void *owner); +bool upb_handlers_isfrozen(const upb_handlers *h); +void upb_handlers_ref(const upb_handlers *h, const void *owner); +void upb_handlers_donateref( + const upb_handlers *h, const void *from, const void *to); +void upb_handlers_checkref(const upb_handlers *h, const void *owner); + +bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s); +const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h); +void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler); +upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h); +void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler); +upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h); +bool upb_handlers_setint32( + upb_handlers *h, const upb_fielddef *f, upb_int32_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setint64( + upb_handlers *h, const upb_fielddef *f, upb_int64_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint32( + upb_handlers *h, const upb_fielddef *f, upb_uint32_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint64( + upb_handlers *h, const upb_fielddef *f, upb_uint64_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setfloat( + upb_handlers *h, const upb_fielddef *f, upb_float_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setdouble( + upb_handlers *h, const upb_fielddef *f, upb_double_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setbool( + upb_handlers *h, const upb_fielddef *f, upb_bool_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartstr( + upb_handlers *h, const upb_fielddef *f, upb_startstr_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstring( + upb_handlers *h, const upb_fielddef *f, upb_string_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendstr( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartseq( + upb_handlers *h, const upb_fielddef *f, upb_startfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartsubmsg( + upb_handlers *h, const upb_fielddef *f, upb_startfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendsubmsg( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendseq( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setsubhandlers( + upb_handlers *h, const upb_fielddef *f, const upb_handlers *sub); +const upb_handlers *upb_handlers_getsubhandlers( + const upb_handlers *h, const upb_fielddef *f); +upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f); +bool upb_getselector( + const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s); +upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s); +void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s); +size_t upb_gethandleroffset(upb_selector_t s); + +// Internal-only. +uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f); +uint32_t upb_handlers_selectorcount(const upb_fielddef *f); +#ifdef __cplusplus +} // extern "C" +#endif -// A upb_fhandlers object represents the set of handlers associated with one -// specific message field. +// Convenience versions of the above that first look up the field by name. +#define DEFINE_NAME_SETTER(slot, type) \ + INLINE void upb_handlers_set ## slot ## _n( \ + upb_handlers *h, const char *name, type val, \ + void *d, upb_handlerfree *fr) { \ + upb_handlers_set ## slot(h, upb_msgdef_ntof( \ + upb_handlers_msgdef(h), name), val, d, fr); \ + } +DEFINE_NAME_SETTER(int32, upb_int32_handler*); +DEFINE_NAME_SETTER(int64, upb_int64_handler*); +DEFINE_NAME_SETTER(uint32, upb_uint32_handler*); +DEFINE_NAME_SETTER(uint64, upb_uint64_handler*); +DEFINE_NAME_SETTER(float, upb_float_handler*); +DEFINE_NAME_SETTER(double, upb_double_handler*); +DEFINE_NAME_SETTER(bool, upb_bool_handler*); +DEFINE_NAME_SETTER(startstr, upb_startstr_handler*); +DEFINE_NAME_SETTER(string, upb_string_handler*); +DEFINE_NAME_SETTER(endstr, upb_endfield_handler*); +DEFINE_NAME_SETTER(startseq, upb_startfield_handler*); +DEFINE_NAME_SETTER(startsubmsg, upb_startfield_handler*); +DEFINE_NAME_SETTER(endsubmsg, upb_endfield_handler*); +DEFINE_NAME_SETTER(endseq, upb_endfield_handler*); +#undef DEFINE_NAME_SETTER + +// Value writers for every in-memory type: write the data to a known offset +// from the closure "c." These depend on the fval being a pointer to a +// structure that is (or begins with) the upb_stdmsg_fval type. // -// TODO: remove upb_decoder-specific fields from this, and instead have -// upb_decoderplan make a deep copy of the whole graph with its own fields -// added. -struct _upb_decoder; -struct _upb_mhandlers; -typedef struct _upb_fieldent { - upb_fieldtype_t type; - bool repeated; - uint32_t refcount; - uint32_t number; +// TODO(haberman): These are hacky; remove them and replace with an API that +// lets you set a simple "writer" handler in a way that can generate +// specialized code right then. + +typedef struct upb_stdmsg_fval { +#ifdef __cplusplus + upb_stdmsg_fval(size_t offset_, int32_t hasbit_) + : offset(offset_), + hasbit(hasbit_) { + } +#endif + size_t offset; int32_t hasbit; - struct _upb_mhandlers *msg; - struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true. - upb_value fval; - upb_value_handler *value; - upb_startfield_handler *startsubmsg; - upb_endfield_handler *endsubmsg; - upb_startfield_handler *startseq; - upb_endfield_handler *endseq; -#ifdef UPB_USE_JIT_X64 - uint32_t jit_pclabel; - uint32_t jit_pclabel_notypecheck; - uint32_t jit_submsg_done_pclabel; +} upb_stdmsg_fval; + +#ifdef __cplusplus +extern "C" { #endif -} upb_fhandlers; - -// fhandlers are created as part of a upb_handlers instance, but can be ref'd -// and unref'd to prolong the life of the handlers. -void upb_fhandlers_ref(upb_fhandlers *m); -void upb_fhandlers_unref(upb_fhandlers *m); - -// upb_fhandlers accessors -#define UPB_FHANDLERS_ACCESSORS(name, type) \ - INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \ - INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; } -// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts -// the handlers. -UPB_FHANDLERS_ACCESSORS(fval, upb_value) -UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*) -UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*) -UPB_FHANDLERS_ACCESSORS(endsubmsg, upb_endfield_handler*) -UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*) -UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*) -UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*) -UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*) -// If set to >= 0, the hasbit will automatically be set when the corresponding -// field is parsed (when a JIT is enabled, this can be significantly more -// efficient than setting the hasbit yourself inside the callback). For values -// it is undefined whether the hasbit is set before or after the callback is -// called. For seq and submsg, the hasbit is set *after* the start handler is -// called, but before any of the handlers for the submsg or sequence. -UPB_FHANDLERS_ACCESSORS(hasbit, int32_t) - - -/* upb_mhandlers **************************************************************/ - -// A upb_mhandlers object represents the set of handlers associated with a -// message in the graph of messages. - -typedef struct _upb_mhandlers { - uint32_t refcount; - upb_startmsg_handler *startmsg; - upb_endmsg_handler *endmsg; - upb_inttable fieldtab; // Maps field number -> upb_fhandlers. - bool is_group; -#ifdef UPB_USE_JIT_X64 - // Used inside the JIT to track labels (jmp targets) in the generated code. - uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message. - uint32_t jit_afterstartmsg_pclabel; // After calling the startmsg handler. - uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end? - uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message. - uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup. - uint32_t jit_unknownfield_pclabel; // Parsed an unknown field. - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; - // Pointer to the JIT code for parsing this message. - void *jit_func; +bool upb_stdmsg_setint32(void *c, void *d, int32_t val); +bool upb_stdmsg_setint64(void *c, void *d, int64_t val); +bool upb_stdmsg_setuint32(void *c, void *d, uint32_t val); +bool upb_stdmsg_setuint64(void *c, void *d, uint64_t val); +bool upb_stdmsg_setfloat(void *c, void *d, float val); +bool upb_stdmsg_setdouble(void *c, void *d, double val); +bool upb_stdmsg_setbool(void *c, void *d, bool val); +#ifdef __cplusplus +} // extern "C" #endif -} upb_mhandlers; - -// mhandlers are created as part of a upb_handlers instance, but can be ref'd -// and unref'd to prolong the life of the handlers. -void upb_mhandlers_ref(upb_mhandlers *m); -void upb_mhandlers_unref(upb_mhandlers *m); - -// Creates a new field with the given name and number. There must not be an -// existing field with either this name or number or abort() will be called. -// TODO: this should take a name also. -upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated); -// Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the -// given submessage must not have any fields with this field number. -upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated, - upb_mhandlers *subm); - -// upb_mhandlers accessors. -#define UPB_MHANDLERS_ACCESSORS(name, type) \ - INLINE void upb_mhandlers_set ## name(upb_mhandlers *m, type v){m->name = v;} \ - INLINE type upb_mhandlers_get ## name(upb_mhandlers *m) { return m->name; } -UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*); -UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*); - -// Returns fhandlers for the given field, or NULL if none. -upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n); - - -/* upb_handlers ***************************************************************/ - -struct _upb_handlers { - uint32_t refcount; - upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel. - int msgs_len, msgs_size; - bool should_jit; -}; -typedef struct _upb_handlers upb_handlers; - -upb_handlers *upb_handlers_new(void); -void upb_handlers_ref(upb_handlers *h); -void upb_handlers_unref(upb_handlers *h); - -// Appends a new message to the graph of handlers and returns it. This message -// can be obtained later at index upb_handlers_msgcount()-1. All handlers will -// be initialized to no-op handlers. -upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h); -upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index); - -// Convenience function for registering handlers for all messages and -// fields in a msgdef and all its children. For every registered message -// "msgreg_cb" will be called with the newly-created mhandlers, and likewise -// with "fieldreg_cb" -// -// See upb_handlers_reghandlerset() below for an example. -typedef void upb_onmsgreg( - void *closure, upb_mhandlers *mh, const upb_msgdef *m); -typedef void upb_onfieldreg( - void *closure, upb_fhandlers *fh, const upb_fielddef *f); -upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure); - -// Convenience function for registering a set of handlers for all messages and -// fields in a msgdef and its children, with the fval bound to the upb_fielddef. -// Any of the handlers may be NULL, in which case no callback will be set and -// the nop callback will be used. -typedef struct { - upb_startmsg_handler *startmsg; - upb_endmsg_handler *endmsg; - upb_value_handler *value; - upb_startfield_handler *startsubmsg; - upb_endfield_handler *endsubmsg; - upb_startfield_handler *startseq; - upb_endfield_handler *endseq; -} upb_handlerset; - -INLINE void upb_onmreg_hset(void *c, upb_mhandlers *mh, const upb_msgdef *m) { - (void)m; - upb_handlerset *hs = (upb_handlerset*)c; - if (hs->startmsg) upb_mhandlers_setstartmsg(mh, hs->startmsg); - if (hs->endmsg) upb_mhandlers_setendmsg(mh, hs->endmsg); -} -INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) { - upb_handlerset *hs = (upb_handlerset*)c; - if (hs->value) upb_fhandlers_setvalue(fh, hs->value); - if (hs->startsubmsg) upb_fhandlers_setstartsubmsg(fh, hs->startsubmsg); - if (hs->endsubmsg) upb_fhandlers_setendsubmsg(fh, hs->endsubmsg); - if (hs->startseq) upb_fhandlers_setstartseq(fh, hs->startseq); - if (hs->endseq) upb_fhandlers_setendseq(fh, hs->endseq); - upb_value val; - upb_value_setfielddef(&val, f); - upb_fhandlers_setfval(fh, val); -} -INLINE upb_mhandlers *upb_handlers_reghandlerset( - upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) { - return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs); -} - - -/* upb_dispatcher *************************************************************/ - -// WARNING: upb_dispatcher should be considered INTERNAL-ONLY. The interface -// between it and upb_decoder is somewhat tightly coupled and may change. -// -// upb_dispatcher can be used by sources of data to invoke the appropriate -// handlers on a upb_handlers object. Besides maintaining the runtime stack of -// closures and handlers, the dispatcher checks the return status of user -// callbacks and properly handles statuses other than UPB_CONTINUE, invoking -// "skip" or "exit" handlers on the underlying data source as appropriate. - -typedef struct { - upb_fhandlers *f; - void *closure; - uint64_t end_ofs; - bool is_sequence; // frame represents seq or submsg? (f might be both). - bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX - // (strings aren't pushed). -} upb_dispatcher_frame; - -typedef void upb_exit_handler(void *); - -typedef struct { - upb_dispatcher_frame *top, *limit; - - // Msg and dispatch table for the current level. - upb_mhandlers *msgent; - upb_mhandlers *toplevel_msgent; - upb_exit_handler UPB_NORETURN *exitjmp; - void *srcclosure; - bool top_is_implicit; - - // Stack. - upb_status *status; - upb_dispatcher_frame stack[UPB_MAX_NESTING]; -} upb_dispatcher; - -// Caller retains ownership of the status object. -void upb_dispatcher_init(upb_dispatcher *d, upb_status *status, - upb_exit_handler UPB_NORETURN *exit, void *closure); -upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure, - upb_mhandlers *top_msg); -void upb_dispatcher_uninit(upb_dispatcher *d); - -// Tests whether the message could legally end here (either the stack is empty -// or the only open stack frame is implicit). -bool upb_dispatcher_islegalend(upb_dispatcher *d); - -// Unwinds one or more stack frames based on the given flow constant that was -// just returned from a handler. Calls end handlers as appropriate. -void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN; - -INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) { - char *p = (char*)_p; - if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8)); -} - -// Dispatch functions -- call the user handler and handle errors. -INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, - upb_value val) { - upb_flow_t flow = UPB_CONTINUE; - if (f->value) flow = f->value(d->top->closure, f->fval, val); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); -} -void upb_dispatch_startmsg(upb_dispatcher *d); -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); -upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fhandlers *f); -upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d); -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, - upb_fhandlers *f); -upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d); #ifdef __cplusplus -} /* extern "C" */ + +namespace upb { + +// C++ Wrappers +inline Handlers* Handlers::New(const MessageDef* m, const void *owner) { + return upb_handlers_new(m, owner); +} +inline const Handlers* Handlers::NewFrozen( + const MessageDef *m, const void *owner, + upb_handlers_callback *callback, void *closure) { + return upb_handlers_newfrozen(m, owner, callback, closure); +} +inline bool Handlers::IsFrozen() const { + return upb_handlers_isfrozen(this); +} +inline void Handlers::Ref(const void* owner) const { + upb_handlers_ref(this, owner); +} +inline void Handlers::Unref(const void* owner) const { + upb_handlers_unref(this, owner); +} +inline void Handlers::DonateRef(const void *from, const void *to) const { + upb_handlers_donateref(this, from, to); +} +inline void Handlers::CheckRef(const void *owner) const { + upb_handlers_checkref(this, owner); +} +inline bool Handlers::Freeze(Handlers*const* handlers, int n, Status* s) { + return upb_handlers_freeze(handlers, n, s); +} +inline const MessageDef* Handlers::message_def() const { + return upb_handlers_msgdef(this); +} +inline void Handlers::SetStartMessageHandler( + Handlers::StartMessageHandler *handler) { + upb_handlers_setstartmsg(this, handler); +} +inline void Handlers::SetEndMessageHandler( + Handlers::EndMessageHandler *handler) { + upb_handlers_setendmsg(this, handler); +} +inline bool Handlers::SetInt32Handler( + const FieldDef *f, Handlers::Int32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint32(this, f, handler, d, fr); +} +inline bool Handlers::SetInt64Handler( + const FieldDef *f, Handlers::Int64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint64(this, f, handler, d, fr); +} +inline bool Handlers::SetUint32Handler( + const FieldDef *f, Handlers::Uint32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint32(this, f, handler, d, fr); +} +inline bool Handlers::SetUint64Handler( + const FieldDef *f, Handlers::Uint64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint64(this, f, handler, d, fr); +} +inline bool Handlers::SetFloatHandler( + const FieldDef *f, Handlers::FloatHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setfloat(this, f, handler, d, fr); +} +inline bool Handlers::SetDoubleHandler( + const FieldDef *f, Handlers::DoubleHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setdouble(this, f, handler, d, fr); +} +inline bool Handlers::SetBoolHandler( + const FieldDef *f, Handlers::BoolHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setbool(this, f, handler, d, fr); +} +inline bool Handlers::SetStartStringHandler( + const FieldDef* f, Handlers::StartStringHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setstartstr(this, f, handler, d, fr); +} +inline bool Handlers::SetEndStringHandler( + const FieldDef* f, Handlers::EndFieldHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setendstr(this, f, handler, d, fr); +} +inline bool Handlers::SetStringHandler( + const FieldDef *f, Handlers::StringHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstring(this, f, handler, d, fr); +} +inline bool Handlers::SetStartSequenceHandler( + const FieldDef* f, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartseq(this, f, handler, d, fr); +} +inline bool Handlers::SetStartSubMessageHandler( + const FieldDef* f, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartsubmsg(this, f, handler, d, fr); +} +inline bool Handlers::SetEndSubMessageHandler( + const FieldDef* f, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendsubmsg(this, f, handler, d, fr); +} +inline bool Handlers::SetEndSequenceHandler( + const FieldDef* f, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendseq(this, f, handler, d, fr); +} +inline bool Handlers::SetSubHandlers( + const FieldDef* f, const Handlers* sub) { + return upb_handlers_setsubhandlers(this, f, sub); +} +inline Handlers::StartMessageHandler *Handlers::GetStartMessageHandler() const { + return upb_handlers_getstartmsg(this); +} +inline Handlers::EndMessageHandler *Handlers::GetEndMessageHandler() const { + return upb_handlers_getendmsg(this); +} +inline const Handlers* Handlers::GetSubHandlers( + const FieldDef* f) const { + return upb_handlers_getsubhandlers(this, f); +} +inline bool Handlers::GetSelector( + const FieldDef* f, Handlers::Type type, Handlers::Selector* s) { + return upb_getselector(f, type, s); +} +inline Handlers::GenericFunction* Handlers::GetHandler( + Handlers::Selector selector) { + return upb_handlers_gethandler(this, selector); +} +inline void* Handlers::GetHandlerData(Handlers::Selector selector) { + return upb_handlers_gethandlerdata(this, selector); +} +inline size_t Handlers::GetHandlerOffset(Handlers::Selector selector) { + return upb_gethandleroffset(selector); +} + +#define SET_VALUE_HANDLER(type, ctype) \ + template<> \ + inline bool Handlers::SetValueHandler( \ + const FieldDef* f, \ + typename Handlers::Value::Handler* handler, \ + void* data, Handlers::Free* cleanup) { \ + return upb_handlers_set ## type(this, f, handler, data, cleanup); \ + } +SET_VALUE_HANDLER(double, double); +SET_VALUE_HANDLER(float, float); +SET_VALUE_HANDLER(uint64, uint64_t); +SET_VALUE_HANDLER(uint32, uint32_t); +SET_VALUE_HANDLER(int64, int64_t); +SET_VALUE_HANDLER(int32, int32_t); +SET_VALUE_HANDLER(bool, bool); +#undef SET_VALUE_HANDLER + +template void DeletePointer(void *p) { delete static_cast(p); } + +template +void SetStoreValueHandler( + const FieldDef* f, size_t offset, int32_t hasbit, Handlers* h); + +// A handy templated function that will retrieve a value handler for a given +// C++ type. +#define SET_STORE_VALUE_HANDLER(type, ctype) \ + template <> \ + inline void SetStoreValueHandler(const FieldDef* f, size_t offset, \ + int32_t hasbit, Handlers* h) { \ + h->SetValueHandler( \ + f, upb_stdmsg_set ## type, new upb_stdmsg_fval(offset, hasbit), \ + &upb::DeletePointer); \ + } + +SET_STORE_VALUE_HANDLER(double, double); +SET_STORE_VALUE_HANDLER(float, float); +SET_STORE_VALUE_HANDLER(uint64, uint64_t); +SET_STORE_VALUE_HANDLER(uint32, uint32_t); +SET_STORE_VALUE_HANDLER(int64, int64_t); +SET_STORE_VALUE_HANDLER(int32, int32_t); +SET_STORE_VALUE_HANDLER(bool, bool); +#undef GET_VALUE_HANDLER + +} // namespace upb #endif #endif diff --git a/upb/msg.c b/upb/msg.c deleted file mode 100644 index c671b7b..0000000 --- a/upb/msg.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - */ - -#include "upb/upb.h" -#include "upb/msg.h" - -#define UPB_ACCESSOR(type, ctype) \ - upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \ - upb_value val) { \ - assert(_m != NULL); \ - const upb_fielddef *f = upb_value_getfielddef(fval); \ - uint8_t *m = _m; \ - /* Hasbit is set automatically by the handlers. */ \ - *(ctype*)&m[f->offset] = upb_value_get ## type(val); \ - return UPB_CONTINUE; \ - } \ - -UPB_ACCESSOR(double, double) -UPB_ACCESSOR(float, float) -UPB_ACCESSOR(int32, int32_t) -UPB_ACCESSOR(int64, int64_t) -UPB_ACCESSOR(uint32, uint32_t) -UPB_ACCESSOR(uint64, uint64_t) -UPB_ACCESSOR(bool, bool) -UPB_ACCESSOR(ptr, void*) -#undef UPB_ACCESSORS - -static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, - const upb_fielddef *f) { - (void)c; - if (f->accessor) { - upb_fhandlers_setfval(fh, f->fval); - if (upb_isseq(f)) { - upb_fhandlers_setstartseq(fh, f->accessor->startseq); - upb_fhandlers_setvalue(fh, f->accessor->append); - upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg); - } else { - upb_fhandlers_setvalue(fh, f->accessor->set); - upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg); - upb_fhandlers_sethasbit(fh, f->hasbit); - } - } -} - -upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m) { - return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL); -} diff --git a/upb/msg.h b/upb/msg.h deleted file mode 100644 index 7aaaf2a..0000000 --- a/upb/msg.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * Routines for reading and writing message data to an in-memory structure, - * similar to a C struct. - * - * upb does not define one single message object that everyone must use. - * Rather it defines an abstract interface for reading and writing members - * of a message object, and all of the parsers and serializers use this - * abstract interface. This allows upb's parsers and serializers to be used - * regardless of what memory management scheme or synchronization model the - * application is using. - * - * A standard set of accessors is provided for doing simple reads and writes at - * a known offset into the message. These accessors should be used when - * possible, because they are specially optimized -- for example, the JIT can - * recognize them and emit specialized code instead of having to call the - * function at all. The application can substitute its own accessors when the - * standard accessors are not suitable. - */ - -#ifndef UPB_MSG_H -#define UPB_MSG_H - -#include -#include "upb/def.h" -#include "upb/handlers.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -/* upb_accessor ***************************************************************/ - -// A upb_accessor is a table of function pointers for doing reads and writes -// for one specific upb_fielddef. Each field has a separate accessor, which -// lives in the fielddef. - -typedef bool upb_has_reader(const void *m, upb_value fval); -typedef upb_value upb_value_reader(const void *m, upb_value fval); - -typedef const void *upb_seqbegin_handler(const void *s); -typedef const void *upb_seqnext_handler(const void *s, const void *iter); -typedef upb_value upb_seqget_handler(const void *iter); -INLINE bool upb_seq_done(const void *iter) { return iter == NULL; } - -typedef struct _upb_accessor_vtbl { - // Writers. These take an fval as a parameter because the callbacks are used - // as upb_handlers, but the fval is always the fielddef for that field. - upb_startfield_handler *startsubmsg; // Non-repeated submsg fields. - upb_value_handler *set; // Non-repeated scalar fields. - upb_startfield_handler *startseq; // Repeated fields only. - upb_startfield_handler *appendsubmsg; // Repeated submsg fields. - upb_value_handler *append; // Repeated scalar fields. - - // TODO: expect to also need endsubmsg and endseq. - - // Readers. - upb_has_reader *has; - upb_value_reader *getseq; - upb_value_reader *get; - upb_seqbegin_handler *seqbegin; - upb_seqnext_handler *seqnext; - upb_seqget_handler *seqget; -} upb_accessor_vtbl; - -// Registers handlers for writing into a message of the given type using -// whatever accessors it has defined. -upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m); - -INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) { - ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8)); -} - -/* upb_msg/upb_seq ************************************************************/ - -// These accessor functions are simply convenience methods for reading or -// writing to a message through its accessors. - -INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) { - return f->accessor && f->accessor->has(m, f->fval); -} - -// May only be called for fields that have accessors. -INLINE upb_value upb_msg_get(const void *m, const upb_fielddef *f) { - assert(f->accessor && !upb_isseq(f)); - return f->accessor->get(m, f->fval); -} - -// May only be called for fields that have accessors. -INLINE upb_value upb_msg_getseq(const void *m, const upb_fielddef *f) { - assert(f->accessor && upb_isseq(f)); - return f->accessor->getseq(m, f->fval); -} - -INLINE void upb_msg_set(void *m, const upb_fielddef *f, upb_value val) { - assert(f->accessor); - f->accessor->set(m, f->fval, val); -} - -INLINE const void *upb_seq_begin(const void *s, const upb_fielddef *f) { - assert(f->accessor); - return f->accessor->seqbegin(s); -} -INLINE const void *upb_seq_next(const void *s, const void *iter, - const upb_fielddef *f) { - assert(f->accessor); - assert(!upb_seq_done(iter)); - return f->accessor->seqnext(s, iter); -} -INLINE upb_value upb_seq_get(const void *iter, const upb_fielddef *f) { - assert(f->accessor); - assert(!upb_seq_done(iter)); - return f->accessor->seqget(iter); -} - -INLINE bool upb_msg_has_named(const void *m, const upb_msgdef *md, - const char *field_name) { - const upb_fielddef *f = upb_msgdef_ntof(md, field_name); - return f && upb_msg_has(m, f); -} - -INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md, - const char *field_name, upb_value *val) { - const upb_fielddef *f = upb_msgdef_ntof(md, field_name); - if (!f) return false; - *val = upb_msg_get(m, f); - return true; -} - -// Value writers for every in-memory type: write the data to a known offset -// from the closure "c." -// -// TODO(haberman): instead of having standard writer functions, should we have -// a bool in the accessor that says "write raw value to the field's offset"? -upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 30f7c65..065c495 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -5,17 +5,13 @@ * Author: Josh Haberman */ +#include #include #include #include "upb/bytestream.h" -#include "upb/msg.h" #include "upb/pb/decoder.h" #include "upb/pb/varint.h" -#ifndef UINT32_MAX -#define UINT32_MAX 0xffffffff -#endif - typedef struct { uint8_t native_wire_type; bool is_numeric; @@ -62,11 +58,12 @@ static const upb_decoder_typeinfo upb_decoder_types[] = { #include "upb/pb/decoder_x64.h" #endif -upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { +upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) { + UPB_UNUSED(allowjit); upb_decoderplan *p = malloc(sizeof(*p)); + assert(upb_handlers_isfrozen(h)); p->handlers = h; - upb_handlers_ref(h); - h->should_jit = allowjit; + upb_handlers_ref(h, p); #ifdef UPB_USE_JIT_X64 p->jit_code = NULL; if (allowjit) upb_decoderplan_makejit(p); @@ -76,7 +73,7 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { void upb_decoderplan_unref(upb_decoderplan *p) { // TODO: make truly refcounted. - upb_handlers_unref(p->handlers); + upb_handlers_unref(p->handlers, p); #ifdef UPB_USE_JIT_X64 if (p->jit_code) upb_decoderplan_freejit(p); #endif @@ -100,8 +97,8 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) { // configuration. But emperically on a Core i7, performance increases 30-50% // with these annotations. Every instance where these appear, gcc 4.2.1 made // the wrong decision and degraded performance in benchmarks. -#define FORCEINLINE static __attribute__((__always_inline__)) -#define NOINLINE static __attribute__((__noinline__)) +#define FORCEINLINE static inline __attribute__((always_inline)) +#define NOINLINE static __attribute__((noinline)) UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) { // Resumable decoder would back out to completed_ptr (and possibly get a @@ -141,14 +138,23 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) { return d->bufstart_ofs + (d->end - d->buf); } +static bool upb_decoder_islegalend(upb_decoder *d) { + if (d->top == d->stack) return true; + if (d->top - 1 == d->stack && + d->top->is_sequence && !d->top->is_packed) return true; + return false; +} + +// Calculates derived values that we cache for speed. These reflect a +// combination of the current buffer and the stack, so must be called whenever +// either is updated. static void upb_decoder_setmsgend(upb_decoder *d) { - upb_dispatcher_frame *f = d->dispatcher.top; + upb_decoder_frame *f = d->top; size_t delimlen = f->end_ofs - d->bufstart_ofs; size_t buflen = d->end - d->buf; d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->buf + delimlen : NULL; // NULL if not in this buf. d->top_is_packed = f->is_packed; - d->dispatch_table = &d->dispatcher.msgent->fieldtab; } static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) { @@ -201,11 +207,11 @@ static void upb_pullbuf(upb_decoder *d) { if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF"); } -void upb_decoder_checkpoint(upb_decoder *d) { +static void upb_decoder_checkpoint(upb_decoder *d) { upb_byteregion_discard(d->input, upb_decoder_offset(d)); } -void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { +static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { if (ofs <= upb_decoder_bufendofs(d)) { upb_decoder_advance(d, ofs - upb_decoder_offset(d)); } else { @@ -214,7 +220,7 @@ void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { upb_decoder_checkpoint(d); } -void upb_decoder_discard(upb_decoder *d, size_t bytes) { +static void upb_decoder_discard(upb_decoder *d, size_t bytes) { upb_decoder_discardto(d, upb_decoder_offset(d) + bytes); } @@ -259,7 +265,7 @@ done: // Returns true on success or false if we've hit a valid EOF. FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { if (upb_decoder_bufleft(d) == 0 && - upb_dispatcher_islegalend(&d->dispatcher) && + upb_decoder_islegalend(d) && !upb_trypullbuf(d)) { return false; } @@ -319,21 +325,45 @@ FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { return u64; // TODO: proper byte swapping for big-endian machines. } -INLINE upb_byteregion *upb_decode_string(upb_decoder *d) { - uint32_t strlen = upb_decode_varint32(d); - uint64_t offset = upb_decoder_offset(d); - if (offset + strlen > upb_byteregion_endofs(d->input)) - upb_decoder_abortjmp(d, "Unexpected EOF"); - upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen); - // Could make it an option on the callback whether we fetchall() first or not. - if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK) - upb_decoder_abortjmp(d, "Couldn't fetchall() on string."); - upb_decoder_discardto(d, offset + strlen); - return &d->str_byteregion; +INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) { + upb_decoder_frame *fr = d->top + 1; + if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) { + upb_decoder_abortjmp(d, "Nesting too deep."); + } + fr->f = f; + fr->is_sequence = false; + fr->is_packed = false; + fr->end_ofs = end; + fr->group_fieldnum = end == UPB_NONDELIMITED ? + (int32_t)upb_fielddef_number(f) : -1; + d->top = fr; + upb_decoder_setmsgend(d); } -INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) { - upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end; +INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed, + uint64_t end_ofs) { + upb_decoder_frame *fr = d->top + 1; + if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) { + upb_decoder_abortjmp(d, "Nesting too deep."); + } + fr->f = f; + fr->is_sequence = true; + fr->group_fieldnum = -1; + fr->is_packed = packed; + fr->end_ofs = end_ofs; + d->top = fr; + upb_decoder_setmsgend(d); +} + +INLINE void upb_pop_submsg(upb_decoder *d) { + upb_sink_endsubmsg(&d->sink, d->top->f); + d->top--; + upb_decoder_setmsgend(d); +} + +INLINE void upb_pop_seq(upb_decoder *d) { + upb_sink_endseq(&d->sink, d->top->f); + d->top--; upb_decoder_setmsgend(d); } @@ -344,13 +374,14 @@ INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) { // properly sign-extended. We could detect this and error about the data loss, // but proto2 does not do this, so we pass. -#define T(type, wt, valtype, convfunc) \ - INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \ - upb_value val; \ - upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \ - upb_dispatch_value(&d->dispatcher, f, val); \ +#define T(type, wt, name, convfunc) \ + INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \ + upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \ } \ +static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } +static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } + T(INT32, varint, int32, int32_t) T(INT64, varint, int64, int64_t) T(UINT32, varint, uint32, uint32_t) @@ -361,43 +392,44 @@ T(SFIXED32, fixed32, int32, int32_t) T(SFIXED64, fixed64, int64, int64_t) T(BOOL, varint, bool, bool) T(ENUM, varint, int32, int32_t) +T(DOUBLE, fixed64, double, upb_asdouble) +T(FLOAT, fixed32, float, upb_asfloat) T(SINT32, varint, int32, upb_zzdec_32) T(SINT64, varint, int64, upb_zzdec_64) -T(STRING, string, byteregion, upb_byteregion*) - #undef T -INLINE void upb_decode_DOUBLE(upb_decoder *d, upb_fhandlers *f) { - upb_value val; - double dbl; - uint64_t wireval = upb_decode_fixed64(d); - memcpy(&dbl, &wireval, 8); - upb_value_setdouble(&val, dbl); - upb_dispatch_value(&d->dispatcher, f, val); -} - -INLINE void upb_decode_FLOAT(upb_decoder *d, upb_fhandlers *f) { - upb_value val; - float flt; - uint64_t wireval = upb_decode_fixed32(d); - memcpy(&flt, &wireval, 4); - upb_value_setfloat(&val, flt); - upb_dispatch_value(&d->dispatcher, f, val); -} - -static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { +static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) { upb_push_msg(d, f, UPB_NONDELIMITED); } -static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { - (void)f; - upb_dispatch_endsubmsg(&d->dispatcher); - upb_decoder_setmsgend(d); -} -static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { + +static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) { uint32_t len = upb_decode_varint32(d); upb_push_msg(d, f, upb_decoder_offset(d) + len); } +static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) { + uint32_t strlen = upb_decode_varint32(d); + uint64_t offset = upb_decoder_offset(d); + uint64_t end = offset + strlen; + if (end > upb_byteregion_endofs(d->input)) + upb_decoder_abortjmp(d, "Unexpected EOF"); + upb_sink_startstr(&d->sink, f, strlen); + while (strlen > 0) { + if (upb_byteregion_available(d->input, offset) == 0) + upb_pullbuf(d); + size_t len; + const char *ptr = upb_byteregion_getptr(d->input, offset, &len); + len = UPB_MIN(len, strlen); + len = upb_sink_putstring(&d->sink, f, ptr, len); + if (len > strlen) + upb_decoder_abortjmp(d, "Skipped too many bytes."); + offset += len; + strlen -= len; + upb_decoder_discardto(d, offset); + } + upb_sink_endstr(&d->sink, f); +} + /* The main decoding loop *****************************************************/ @@ -410,33 +442,33 @@ static void upb_decoder_checkdelim(upb_decoder *d) { // handler). while (d->delim_end != NULL && d->ptr >= d->delim_end) { if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end"); - if (d->dispatcher.top->is_sequence) { - upb_dispatch_endseq(&d->dispatcher); + if (d->top->is_sequence) { + upb_pop_seq(d); } else { - upb_dispatch_endsubmsg(&d->dispatcher); + upb_pop_submsg(d); } - upb_decoder_setmsgend(d); } } -INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { +INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { while (1) { uint32_t tag; if (!upb_trydecode_varint32(d, &tag)) return NULL; uint8_t wire_type = tag & 0x7; - uint32_t fieldnum = tag >> 3; - const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum); - upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL; - bool is_packed = false; + uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL; + const upb_handlers *h = upb_sink_tophandlers(&d->sink); + f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum); + bool packed = false; if (f) { // Wire type check. - if (wire_type == upb_decoder_types[f->type].native_wire_type) { + upb_fieldtype_t type = upb_fielddef_type(f); + if (wire_type == upb_decoder_types[type].native_wire_type) { // Wire type is ok. } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && - upb_decoder_types[f->type].is_numeric)) { + upb_decoder_types[type].is_numeric)) { // Wire type is ok (and packed). - is_packed = true; + packed = true; } else { f = NULL; } @@ -445,29 +477,24 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { // There are no explicit "startseq" or "endseq" markers in protobuf // streams, so we have to infer them by noticing when a repeated field // starts or ends. - upb_dispatcher_frame *fr = d->dispatcher.top; + upb_decoder_frame *fr = d->top; if (fr->is_sequence && fr->f != f) { - upb_dispatch_endseq(&d->dispatcher); - upb_decoder_setmsgend(d); - fr = d->dispatcher.top; + upb_pop_seq(d); + fr = d->top; } - if (f && f->repeated && !fr->is_sequence) { - upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f); - if (is_packed) { - // Packed primitive field. + + if (f && upb_fielddef_isseq(f) && !fr->is_sequence) { + if (packed) { uint32_t len = upb_decode_varint32(d); - fr2->end_ofs = upb_decoder_offset(d) + len; - fr2->is_packed = true; + upb_push_seq(d, f, true, upb_decoder_offset(d) + len); } else { - // Non-packed field -- this tag pertains to only a single message. - fr2->end_ofs = fr->end_ofs; + upb_push_seq(d, f, false, fr->end_ofs); } - upb_decoder_setmsgend(d); } if (f) return f; - // Unknown field. + // Unknown field or ENDGROUP. if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) upb_decoder_abortjmp(d, "Invalid field number"); switch (wire_type) { @@ -479,7 +506,12 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { case UPB_WIRE_TYPE_START_GROUP: upb_decoder_abortjmp(d, "Can't handle unknown groups yet"); case UPB_WIRE_TYPE_END_GROUP: - upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag"); + if (fieldnum != fr->group_fieldnum) + upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag"); + upb_sink_endsubmsg(&d->sink, fr->f); + d->top--; + upb_decoder_setmsgend(d); + break; default: upb_decoder_abortjmp(d, "Invalid wire type"); } @@ -495,30 +527,30 @@ upb_success_t upb_decoder_decode(upb_decoder *d) { assert(!upb_ok(&d->status)); return UPB_ERROR; } - upb_dispatch_startmsg(&d->dispatcher); + upb_sink_startmsg(&d->sink); // Prime the buf so we can hit the JIT immediately. upb_trypullbuf(d); - upb_fhandlers *f = d->dispatcher.top->f; + const upb_fielddef *f = d->top->f; while(1) { - upb_decoder_checkdelim(d); #ifdef UPB_USE_JIT_X64 upb_decoder_enterjit(d); upb_decoder_checkpoint(d); + upb_decoder_setmsgend(d); #endif + upb_decoder_checkdelim(d); if (!d->top_is_packed) f = upb_decode_tag(d); if (!f) { // Sucessful EOF. We may need to dispatch a top-level implicit frame. - if (d->dispatcher.top->is_sequence) { - assert(d->dispatcher.top == d->dispatcher.stack + 1); - upb_dispatch_endseq(&d->dispatcher); + if (d->top->is_sequence) { + assert(d->sink.top == d->sink.stack + 1); + upb_pop_seq(d); } - assert(d->dispatcher.top == d->dispatcher.stack); - upb_dispatch_endmsg(&d->dispatcher, &d->status); + assert(d->top == d->stack); + upb_sink_endmsg(&d->sink, &d->status); return UPB_OK; } - switch (f->type) { - case UPB_TYPE_ENDGROUP: upb_endgroup(d, f); break; + switch (upb_fielddef_type(f)) { case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break; case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break; case UPB_TYPE(INT64): upb_decode_INT64(d, f); break; @@ -545,28 +577,29 @@ upb_success_t upb_decoder_decode(upb_decoder *d) { void upb_decoder_init(upb_decoder *d) { upb_status_init(&d->status); - upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d); d->plan = NULL; d->input = NULL; + d->limit = &d->stack[UPB_MAX_NESTING]; } -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) { - assert(msg_offset >= 0); - assert(msg_offset < p->handlers->msgs_len); +void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) { d->plan = p; - d->msg_offset = msg_offset; d->input = NULL; + upb_sink_init(&d->sink, p->handlers); } void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, - void *closure) { + void *c) { assert(d->plan); - upb_dispatcher_frame *f = - upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]); upb_status_clear(&d->status); - f->end_ofs = UPB_NONDELIMITED; + upb_sink_reset(&d->sink, c); d->input = input; - d->str_byteregion.bytesrc = input->bytesrc; + + d->top = d->stack; + d->top->is_sequence = false; + d->top->is_packed = false; + d->top->group_fieldnum = UINT32_MAX; + d->top->end_ofs = UPB_NONDELIMITED; // Protect against assert in skiptonewbuf(). d->bufstart_ofs = 0; @@ -576,6 +609,5 @@ void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, } void upb_decoder_uninit(upb_decoder *d) { - upb_dispatcher_uninit(&d->dispatcher); upb_status_uninit(&d->status); } diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index df65468..690ebb9 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -13,9 +13,8 @@ #define UPB_DECODER_H_ #include -#include -#include -#include "upb/handlers.h" +#include "upb/bytestream.h" +#include "upb/sink.h" #ifdef __cplusplus extern "C" { @@ -34,9 +33,12 @@ extern "C" { struct _upb_decoderplan; typedef struct _upb_decoderplan upb_decoderplan; -// TODO: add parameter for a list of other decoder plans that we can share -// generated code with. -upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit); +// TODO(haberman): +// - add support for letting any message in the plan be at the top level. +// - make this object a handlers instead (when bytesrc/bytesink are merged +// into handlers). +// - add support for sharing code with previously-built plans/handlers. +upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit); void upb_decoderplan_unref(upb_decoderplan *p); // Returns true if the plan contains JIT-ted code. This may not be the same as @@ -49,15 +51,28 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p); struct dasm_State; +typedef struct { + const upb_fielddef *f; + uint64_t end_ofs; + uint32_t group_fieldnum; // UINT32_MAX for non-groups. + bool is_sequence; // frame represents seq or submsg? (f might be both). + bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX + // (strings aren't pushed). +} upb_decoder_frame; + typedef struct _upb_decoder { upb_decoderplan *plan; - int msg_offset; // Which message from the plan is top-level. upb_byteregion *input; // Input data (serialized), not owned. - upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. upb_status status; // Where we store errors that occur. - upb_byteregion str_byteregion; // For passing string data to callbacks. - upb_inttable *dispatch_table; + // Where we push parsed data. + // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take + // one of these instead of a void*. + upb_sink sink; + + // Our internal stack. + upb_decoder_frame *top, *limit; + upb_decoder_frame stack[UPB_MAX_NESTING]; // Current input buffer and its stream offset. const char *buf, *ptr, *end; @@ -70,7 +85,11 @@ typedef struct _upb_decoder { #ifdef UPB_USE_JIT_X64 // For JIT, which doesn't do bounds checks in the middle of parsing a field. - const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end) + const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) + + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; #endif // For exiting the decoder on error. @@ -88,7 +107,7 @@ void upb_decoder_uninit(upb_decoder *d); // must live until the decoder is destroyed or reset to a different plan. // // Must be called before upb_decoder_resetinput() or upb_decoder_decode(). -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset); +void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p); // Resets the input of an already-allocated decoder. This puts it in a state // where it has not seen any data, and expects the next data to be from the @@ -111,7 +130,8 @@ INLINE const upb_status *upb_decoder_status(upb_decoder *d) { // Implementation details struct _upb_decoderplan { - upb_handlers *handlers; // owns reference. + // The top-level handlers that this plan calls into. We own a ref. + const upb_handlers *handlers; #ifdef UPB_USE_JIT_X64 // JIT-generated machine code (else NULL). @@ -119,8 +139,23 @@ struct _upb_decoderplan { size_t jit_size; char *debug_info; + // For storing upb_jitmsginfo, which contains per-msg runtime data needed + // by the JIT. + // Maps upb_handlers* -> upb_jitmsginfo. + upb_inttable msginfo; + + // The following members are used only while the JIT is being built. + // This pointer is allocated by dasm_init() and freed by dasm_free(). struct dasm_State *dynasm; + + // For storing pclabel bases while we are building the JIT. + // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base + upb_inttable pclabels; + + // This is not the same as len(pclabels) because the table only contains base + // offsets for each def, but each def can have many pclabels. + uint32_t pclabel_count; #endif }; diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index f58e403..cd09cfe 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -12,6 +12,7 @@ |// function) we must respect alignment rules. All x86-64 systems require |// 16-byte stack alignment. +#include #include #include "dynasm/dasm_x86.h" @@ -28,6 +29,44 @@ #define MAP_32BIT 0 #endif +// These are used to track jump targets for messages and fields. +enum { + STARTMSG = 0, + AFTER_STARTMSG = 1, + ENDOFBUF = 2, + ENDOFMSG = 3, + DYNDISPATCH = 4, + TOTAL_MSG_PCLABELS = 5, +}; + +enum { + FIELD = 0, + FIELD_NO_TYPECHECK = 1, + TOTAL_FIELD_PCLABELS = 2, +}; + +typedef struct { + uint32_t max_field_number; + // Currently keyed on field number. Could also try keying it + // on encoded or decoded tag, or on encoded field number. + void **tablearray; + // Pointer to the JIT code for parsing this message. + void *jit_func; +} upb_jitmsginfo; + +static uint32_t upb_getpclabel(upb_decoderplan *plan, const void *obj, int n) { + const upb_value *v = upb_inttable_lookupptr(&plan->pclabels, obj); + assert(v); + return upb_value_getuint32(*v) + n; +} + +static upb_jitmsginfo *upb_getmsginfo(upb_decoderplan *plan, + const upb_handlers *h) { + const upb_value *v = upb_inttable_lookupptr(&plan->msginfo, h); + assert(v); + return upb_value_getptr(*v); +} + // To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code // at runtime. GDB 7.x+ has defined an interface for doing this, and these // structure/function defintions are copied out of gdb/jit.h @@ -66,7 +105,9 @@ typedef struct { gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; -void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } +void __attribute__((noinline)) __jit_debug_register_code() { + __asm__ __volatile__(""); +} void upb_reg_jit_gdb(upb_decoderplan *plan) { // Create debug info. @@ -120,7 +161,8 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |.define ARG3_32, edx |.define ARG3_64, rdx |.define ARG4_64, rcx -|.define ARG5_32, r8d +|.define XMMARG1, xmm0 + | |// Register allocation / type map. |// ALL of the code in this file uses these register allocations. @@ -128,13 +170,15 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// conventions, but of course when calling to user callbacks we must. |.define PTR, rbx // Writing this to DECODER->ptr commits our progress. |.define CLOSURE, r12 -|.type FRAME, upb_dispatcher_frame, r13 -|.type BYTEREGION,upb_byteregion, r14 +|.type SINKFRAME, upb_sink_frame, r13 +|.type FRAME, upb_decoder_frame, r14 |.type DECODER, upb_decoder, r15 -|.type STDARRAY, upb_stdarray | |.macro callp, addr || upb_assert_notnull(addr); +|// TODO(haberman): fix this. I believe the predicate we should actually be +|// testing is whether the jump distance is greater than INT32_MAX, not the +|// absolute address of the target. || if ((uintptr_t)addr < 0xffffffff) { | call &addr || } else { @@ -143,14 +187,22 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } || } |.endmacro | -|// Checks PTR for end-of-buffer. -|.macro check_eob, m +|// Checkpoints our progress by writing PTR to DECODER, and +|// checks for end-of-buffer. +|.macro checkpoint, h +| mov DECODER->ptr, PTR | cmp PTR, DECODER->effective_end -|| if (m->is_group) { - | jae ->exit_jit -|| } else { - | jae =>m->jit_endofbuf_pclabel -|| } +| jae =>upb_getpclabel(plan, h, ENDOFBUF) +|.endmacro +| +|.macro check_bool_ret +| test al, al +| jz ->exit_jit +|.endmacro +| +|.macro check_ptr_ret +| test rax, rax +| jz ->exit_jit |.endmacro | |// Decodes varint from [PTR + offset] -> ARG3. @@ -172,8 +224,7 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | mov ARG1_64, rax | mov ARG2_32, ARG3_32 | callp upb_vdecode_max8_fast -| test rax, rax -| jz ->exit_jit // >10-byte varint. +| check_ptr_ret // Check for unterminated, >10-byte varint. |9: |.endmacro | @@ -187,74 +238,103 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// Could specialize this by avoiding the value masking: could just key the |// table on the raw (length-masked) varint to save 3-4 cycles of latency. |// Currently only support tables where all entries are in the array part. -|.macro dyndispatch_, m -|=>m->jit_dyndispatch_pclabel: +|.macro dyndispatch_, h +|=>upb_getpclabel(plan, h, DYNDISPATCH): | decode_loaded_varint, 0 | mov ecx, edx | shr ecx, 3 -| and edx, 0x7 // For the type check that will happen later. -| cmp ecx, m->max_field_number // Bounds-check the field. -| ja ->exit_jit // In the future; could be unknown label -|| if ((uintptr_t)m->tablearray < 0xffffffff) { +| and edx, 0x7 // Note: this value is used in the FIELD pclabel below. +| cmp edx, UPB_WIRE_TYPE_END_GROUP +| je >1 +|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h); +| cmp ecx, mi->max_field_number // Bounds-check the field. +| ja ->exit_jit // In the future; could be unknown label +|| if ((uintptr_t)mi->tablearray < 0xffffffff) { | // TODO: support hybrid array/hash tables. -| mov rax, qword [rcx*8 + m->tablearray] +| mov rax, qword [rcx*8 + mi->tablearray] || } else { -| mov64 rax, (uintptr_t)m->tablearray +| mov64 rax, (uintptr_t)mi->tablearray | mov rax, qword [rax + rcx*8] || } | jmp rax // Dispatch: unpredictable jump. +|1: +|// End group. +| cmp ecx, FRAME->group_fieldnum +| jne ->exit_jit // Unexpected END_GROUP tag. +| mov PTR, rax // rax came from decode_loaded_varint +| mov DECODER->ptr, PTR +| jmp =>upb_getpclabel(plan, h, ENDOFMSG) |.endmacro | |.if 1 | // Replicated dispatch: larger code, but better branch prediction. | .define dyndispatch, dyndispatch_ |.else -| .macro dyndispatch, m -| jmp =>m->jit_dyndispatch_pclabel +| // Single dispatch: smaller code, could be faster because of reduced +| // icache usage. We keep this around to allow for easy comparison between +| // the two. +| .macro dyndispatch, h +| jmp =>upb_getpclabel(plan, h, DYNDISPATCH) | .endmacro |.endif | |// Push a stack frame (not the CPU stack, the upb_decoder stack). -|.macro pushframe, f, end_offset_, is_sequence_ -| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing. -| cmp rax, qword DECODER->dispatcher.limit +|.macro pushframe, h, field, end_offset_, endtype +|// Decoder Frame. +| lea rax, [FRAME + sizeof(upb_decoder_frame)] // rax for short addressing +| cmp rax, DECODER->limit | jae ->exit_jit // Frame stack overflow. -| mov64 r8, (uintptr_t)f -| mov qword FRAME:rax->f, r8 +| mov64 r10, (uintptr_t)field +| mov FRAME:rax->f, r10 | mov qword FRAME:rax->end_ofs, end_offset_ -| mov byte FRAME:rax->is_sequence, is_sequence_ -| mov DECODER->dispatcher.top, rax +| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ) +| mov byte FRAME:rax->is_packed, 0 +|| if (upb_fielddef_type(field) == UPB_TYPE_GROUP && +|| endtype == UPB_HANDLER_ENDSUBMSG) { +| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field) +|| } else { +| mov dword FRAME:rax->group_fieldnum, 0xffffffff +|| } +| mov DECODER->top, rax | mov FRAME, rax +|// Sink Frame. +| lea rcx, [SINKFRAME + sizeof(upb_sink_frame)] // rcx for short addressing +| cmp rcx, DECODER->sink.limit +| jae ->exit_jit // Frame stack overflow. +| mov dword SINKFRAME:rcx->end, getselector(field, endtype) +|| if (upb_fielddef_issubmsg(field)) { +| mov64 r9, (uintptr_t)upb_handlers_getsubhandlers(h, field) +|| } else { +| mov64 r9, (uintptr_t)h +|| } +| mov SINKFRAME:rcx->h, r9 +| mov DECODER->sink.top, rcx +| mov SINKFRAME, rcx |.endmacro | -|.macro popframe, m -| sub FRAME, sizeof(upb_dispatcher_frame) -| mov DECODER->dispatcher.top, FRAME -| setmsgend m -| mov CLOSURE, FRAME->closure +|.macro popframe +| sub FRAME, sizeof(upb_decoder_frame) +| mov DECODER->top, FRAME +| sub SINKFRAME, sizeof(upb_sink_frame) +| mov DECODER->sink.top, SINKFRAME +| setmsgend +| mov CLOSURE, SINKFRAME->closure |.endmacro | -|.macro setmsgend, m -| mov rsi, DECODER->jit_end -|| if (m->is_group) { -| mov64 rax, 0xffffffffffffffff -| mov qword DECODER->delim_end, rax -| mov DECODER->effective_end, rsi -|| } else { -| // Could store a correctly-biased version in the frame, at the cost of -| // a larger stack. -| mov eax, dword FRAME->end_ofs -| add rax, qword DECODER->buf -| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs -| cmp rax, rsi -| jb >8 -| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end) +|.macro setmsgend +| mov rsi, DECODER->jit_end +| mov rax, qword FRAME->end_ofs // Will be UINT64_MAX for groups. +| sub rax, qword DECODER->bufstart_ofs +| add rax, qword DECODER->buf // rax = d->buf + f->end_ofs - d->bufstart_ofs +| jc >8 // If the addition overflowed, use jit_end +| cmp rax, rsi +| ja >8 // If jit_end is less, use jit_end +| mov rsi, rax // Use frame end. |8: -| mov DECODER->effective_end, rax -|| } +| mov DECODER->effective_end, rsi |.endmacro | -|// rax contains the tag, compare it against "tag", but since it is a varint +|// rcx contains the tag, compare it against "tag", but since it is a varint |// we must only compare as many bytes as actually have data. |.macro checktag, tag || switch (upb_value_size(tag)) { @@ -279,22 +359,6 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } || } |.endmacro | -|// TODO: optimize for 0 (xor) and 32-bits. -|.macro loadfval, f -||#ifndef NDEBUG -||// Since upb_value carries type information in debug mode -||// only, we need to pass the arguments slightly differently. -| mov ARG3_32, f->fval.type -||#endif -|| if (f->fval.val.uint64 == 0) { -| xor ARG2_32, ARG2_32 -|| } else if (f->fval.val.uint64 < 0xffffffff) { -| mov ARG2_32, f->fval.val.uint64 -|| } else { -| mov64 ARG2_64, f->fval.val.uint64 -|| } -|.endmacro -| |.macro sethas, reg, hasbit || if (hasbit >= 0) { | or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8)) @@ -304,14 +368,37 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } #include #include "upb/pb/varint.h" -#include "upb/msg.h" + +static upb_selector_t getselector(const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + bool ok = upb_getselector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; +} + +static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + return upb_handlers_gethandler(h, getselector(f, type)); +} + +static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type)); +} // Decodes the next val into ARG3, advances PTR. static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, - uint8_t type, size_t tag_size) { + uint8_t type, size_t tag_size, + const upb_handlers *h, + const upb_fielddef *f) { // Decode the value into arg 3 for the callback. switch (type) { case UPB_TYPE(DOUBLE): + | movsd XMMARG1, qword [PTR + tag_size] + | add PTR, 8 + tag_size + break; + case UPB_TYPE(FIXED64): case UPB_TYPE(SFIXED64): | mov ARG3_64, qword [PTR + tag_size] @@ -319,6 +406,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, break; case UPB_TYPE(FLOAT): + | movss XMMARG1, dword [PTR + tag_size] + | add PTR, 4 + tag_size + break; + case UPB_TYPE(FIXED32): case UPB_TYPE(SFIXED32): | mov ARG3_32, dword [PTR + tag_size] @@ -362,7 +453,7 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, break; case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): + case UPB_TYPE(BYTES): { // We only handle the case where the entire string is in our current // buf, which sidesteps any security problems. The C path has more // robust checks. @@ -372,22 +463,42 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, | sub rdi, rax | cmp ARG3_64, rdi // if (len > d->end - str) | ja ->exit_jit // Can't deliver, whole string not in buf. + | mov PTR, rax + + upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR); + if (handler) { + | mov DECODER->tmp_len, ARG3_64 + | mov ARG1_64, CLOSURE + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSTR) + | callp handler + | check_ptr_ret + | mov ARG1_64, rax // sub-closure + | mov ARG4_64, DECODER->tmp_len + } else { + | mov ARG1_64, CLOSURE + | mov ARG4_64, ARG3_64 + } + + handler = gethandler(h, f, UPB_HANDLER_STRING); + if (handler) { + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STRING) + | mov ARG3_64, PTR + | callp handler + // TODO: properly handle returns other than "n" (the whole string). + | add PTR, rax + } else { + | add PTR, ARG4_64 + } - // Update PTR to point past end of string. - | mov rdi, rax - | add rdi, ARG3_64 - | mov PTR, rdi - - // Populate BYTEREGION appropriately. - | sub rax, DECODER->buf - | add rax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs - | mov BYTEREGION->start, rax - | mov BYTEREGION->discard, rax - | add rax, ARG3_64 - | mov BYTEREGION->end, rax - | mov BYTEREGION->fetch, rax // Fast path ensures whole string is loaded - | mov ARG3_64, BYTEREGION + handler = gethandler(h, f, UPB_HANDLER_ENDSTR); + if (handler) { + | mov ARG1_64, CLOSURE + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSTR) + | callp handler + | check_bool_ret + } break; + } // Will dispatch callbacks and call submessage in a second. case UPB_TYPE(MESSAGE): @@ -402,85 +513,85 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, - upb_fhandlers *f) { + const upb_handlers *h, + const upb_fielddef *f) { // Call callbacks. Specializing the append accessors didn't yield a speed // increase in benchmarks. - if (upb_issubmsgtype(f->type)) { - if (f->type == UPB_TYPE(MESSAGE)) { + if (upb_fielddef_issubmsg(f)) { + if (upb_fielddef_type(f) == UPB_TYPE(MESSAGE)) { | mov rsi, PTR | sub rsi, DECODER->buf | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len } else { - assert(f->type == UPB_TYPE(GROUP)); + assert(upb_fielddef_type(f) == UPB_TYPE(GROUP)); | mov rsi, UPB_NONDELIMITED } - | pushframe f, rsi, false + | pushframe h, f, rsi, UPB_HANDLER_ENDSUBMSG // Call startsubmsg handler (if any). - if (f->startsubmsg) { + upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG); + if (startsubmsg) { // upb_sflow_t startsubmsg(void *closure, upb_value fval) | mov ARG1_64, CLOSURE - | loadfval f - | callp f->startsubmsg - | sethas CLOSURE, f->hasbit - | mov CLOSURE, rdx - } else { - | sethas CLOSURE, f->hasbit + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSUBMSG); + | callp startsubmsg + | check_ptr_ret + | mov CLOSURE, rax } - | mov qword FRAME->closure, CLOSURE - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK - | mov DECODER->ptr, PTR + | mov qword SINKFRAME->closure, CLOSURE - const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); - | call =>sub_m->jit_startmsg_pclabel; - | popframe upb_fhandlers_getmsg(f) + // TODO: have to decide what to do with NULLs subhandlers (or whether to + // disallow them and require a full handlers tree to match the def tree). + const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f); + assert(sub_h); + | call =>upb_getpclabel(plan, sub_h, STARTMSG) + | popframe // Call endsubmsg handler (if any). - if (f->endsubmsg) { + upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG); + if (endsubmsg) { // upb_flow_t endsubmsg(void *closure, upb_value fval); | mov ARG1_64, CLOSURE - | loadfval f - | callp f->endsubmsg + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSUBMSG); + | callp endsubmsg + | check_bool_ret } - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK - | mov DECODER->ptr, PTR - } else { + } else if (!upb_fielddef_isstring(f)) { | mov ARG1_64, CLOSURE + upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f); + upb_func *handler = gethandler(h, f, handlertype); + const upb_stdmsg_fval *fv = (void*)gethandlerdata(h, f, handlertype); // Test for callbacks we can specialize. // Can't switch() on function pointers. - if (f->value == &upb_stdmsg_setint64 || - f->value == &upb_stdmsg_setuint64 || - f->value == &upb_stdmsg_setptr || - f->value == &upb_stdmsg_setdouble) { - const upb_fielddef *fd = upb_value_getfielddef(f->fval); - | mov [ARG1_64 + fd->offset], ARG3_64 - } else if (f->value == &upb_stdmsg_setint32 || - f->value == &upb_stdmsg_setuint32 || - f->value == &upb_stdmsg_setfloat) { - const upb_fielddef *fd = upb_value_getfielddef(f->fval); - | mov [ARG1_64 + fd->offset], ARG3_32 - } else if (f->value == &upb_stdmsg_setbool) { - const upb_fielddef *fd = upb_value_getfielddef(f->fval); - | mov [ARG1_64 + fd->offset], ARG3_8 - } else if (f->value) { + if (handler == (void*)&upb_stdmsg_setint64 || + handler == (void*)&upb_stdmsg_setuint64) { + | mov [ARG1_64 + fv->offset], ARG3_64 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setdouble) { + | movsd qword [ARG1_64 + fv->offset], XMMARG1 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setint32 || + handler == (void*)&upb_stdmsg_setuint32) { + | mov [ARG1_64 + fv->offset], ARG3_32 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setfloat) { + | movss dword [ARG1_64 + fv->offset], XMMARG1 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setbool) { + | mov [ARG1_64 + fv->offset], ARG3_8 + | sethas CLOSURE, fv->hasbit + } else if (handler) { // Load closure and fval into arg registers. - ||#ifndef NDEBUG - ||// Since upb_value carries type information in debug mode - ||// only, we need to pass the arguments slightly differently. - | mov ARG4_64, ARG3_64 - | mov ARG5_32, upb_types[f->type].inmemory_type - ||#endif - | loadfval f - | callp f->value + | mov64 ARG2_64, gethandlerdata(h, f, handlertype); + | callp handler + | check_bool_ret } - | sethas CLOSURE, f->hasbit - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK - | mov DECODER->ptr, PTR } } -static uint64_t upb_get_encoded_tag(upb_fhandlers *f) { - uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type; +static uint64_t upb_get_encoded_tag(const upb_fielddef *f) { + uint32_t tag = (upb_fielddef_number(f) << 3) | + upb_decoder_types[upb_fielddef_type(f)].native_wire_type; uint64_t encoded_tag = upb_vencode32(tag); // No tag should be greater than 5 bytes. assert(encoded_tag <= 0xffffffffff); @@ -488,118 +599,121 @@ static uint64_t upb_get_encoded_tag(upb_fhandlers *f) { } // PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m, - upb_fhandlers *f, upb_fhandlers *next_f) { +static void upb_decoderplan_jit_field(upb_decoderplan *plan, + const upb_handlers *h, + const upb_fielddef *f, + const upb_fielddef *next_f) { uint64_t tag = upb_get_encoded_tag(f); uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0; + int tag_size = upb_value_size(tag); // PC-label for the dispatch table. // We check the wire type (which must be loaded in edx) because the // table is keyed on field number, not type. - |=>f->jit_pclabel: + |=>upb_getpclabel(plan, f, FIELD): | cmp edx, (tag & 0x7) | jne ->exit_jit // In the future: could be an unknown field or packed. - |=>f->jit_pclabel_notypecheck: - if (f->repeated) { + |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK): + if (upb_fielddef_isseq(f)) { | mov rsi, FRAME->end_ofs - | pushframe f, rsi, true - if (f->startseq) { + | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ + upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ); + if (startseq) { | mov ARG1_64, CLOSURE - | loadfval f - | callp f->startseq - | sethas CLOSURE, f->hasbit - | mov CLOSURE, rdx - } else { - | sethas CLOSURE, f->hasbit + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSEQ); + | callp startseq + | check_ptr_ret + | mov CLOSURE, rax } - | mov qword FRAME->closure, CLOSURE + | mov qword SINKFRAME->closure, CLOSURE } |1: // Label for repeating this field. - int tag_size = upb_value_size(tag); - if (f->type == UPB_TYPE_ENDGROUP) { - | add PTR, tag_size - | jmp =>m->jit_endofmsg_pclabel - return; - } - - upb_decoderplan_jit_decodefield(plan, f->type, tag_size); - upb_decoderplan_jit_callcb(plan, f); + upb_decoderplan_jit_decodefield(plan, upb_fielddef_type(f), tag_size, h, f); + upb_decoderplan_jit_callcb(plan, h, f); // Epilogue: load next tag, check for repeated field. - | check_eob m + | checkpoint h | mov rcx, qword [PTR] - if (f->repeated) { + if (upb_fielddef_isseq(f)) { | checktag tag | je <1 - if (f->endseq) { + upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); + if (endseq) { | mov ARG1_64, CLOSURE - | loadfval f - | callp f->endseq + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSEQ); + | callp endseq } - | popframe m + | popframe + // Load next tag again (popframe clobbered it). + | mov rcx, qword [PTR] } + if (next_tag != 0) { | checktag next_tag - | je =>next_f->jit_pclabel_notypecheck + | je =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK) } // Fall back to dynamic dispatch. - | dyndispatch m - |1: + | dyndispatch h } static int upb_compare_uint32(const void *a, const void *b) { - // TODO: always put ENDGROUP at the end. return *(uint32_t*)a - *(uint32_t*)b; } -static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { - |=>m->jit_afterstartmsg_pclabel: +static void upb_decoderplan_jit_msg(upb_decoderplan *plan, + const upb_handlers *h) { + |=>upb_getpclabel(plan, h, AFTER_STARTMSG): // There was a call to get here, so we need to align the stack. | sub rsp, 8 | jmp >1 - |=>m->jit_startmsg_pclabel: + |=>upb_getpclabel(plan, h, STARTMSG): // There was a call to get here, so we need to align the stack. | sub rsp, 8 // Call startmsg handler (if any): - if (m->startmsg) { + upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); + if (startmsg) { // upb_flow_t startmsg(void *closure); - | mov ARG1_64, FRAME->closure - | callp m->startmsg - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK + | mov ARG1_64, SINKFRAME->closure + | callp startmsg + | check_bool_ret } |1: - | setmsgend m - | check_eob m + | setmsgend + | checkpoint h | mov ecx, dword [PTR] - | dyndispatch_ m + | dyndispatch_ h // --------- New code section (does not fall through) ------------------------ // Emit code for parsing each field (dynamic dispatch contains pointers to // all of these). - // Create an ordering over the fields (inttable ordering is undefined). - int num_keys = upb_inttable_count(&m->fieldtab); + // Create an ordering over the fields in field number order. + // Parsing will theoretically be fastest if we emit code in the same + // order as field numbers are seen on-the-wire because of an optimization + // in the generated code that skips dynamic dispatch if the next field is + // as expected. + const upb_msgdef *md = upb_handlers_msgdef(h); + int num_keys = upb_msgdef_numfields(md); uint32_t *keys = malloc(num_keys * sizeof(*keys)); int idx = 0; - upb_inttable_iter i; - upb_inttable_begin(&i, &m->fieldtab); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - keys[idx++] = upb_inttable_iter_key(&i); + upb_msg_iter i; + for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i)); } qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); for(int i = 0; i < num_keys; i++) { - upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]); - upb_fhandlers *next_f = - (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL; - upb_decoderplan_jit_field(plan, m, f, next_f); + const upb_fielddef *f = upb_msgdef_itof(md, keys[i]); + const upb_fielddef *next_f = + (i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL; + upb_decoderplan_jit_field(plan, h, f, next_f); } free(keys); @@ -607,27 +721,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { // --------- New code section (does not fall through) ------------------------ // End-of-buf / end-of-message. - if (!m->is_group) { - // This case doesn't exist for groups, because there eob really means - // eob, so that case just exits the jit directly. - |=>m->jit_endofbuf_pclabel: - | cmp PTR, DECODER->delim_end - | jb ->exit_jit // We are at eob, but not end-of-submsg. - } + // We hit a buffer limit; either we hit jit_end or end-of-submessage. + |=>upb_getpclabel(plan, h, ENDOFBUF): + | cmp PTR, DECODER->jit_end + | jae ->exit_jit - |=>m->jit_endofmsg_pclabel: + |=>upb_getpclabel(plan, h, ENDOFMSG): // We are at end-of-submsg: call endmsg handler (if any): - if (m->endmsg) { + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(h); + if (endmsg) { // void endmsg(void *closure, upb_status *status) { - | mov ARG1_64, FRAME->closure - | lea ARG2_64, DECODER->dispatcher.status - | callp m->endmsg - } - - if (m->is_group) { - // Advance past the "end group" tag. - // TODO: Handle UPB_BREAK - | mov DECODER->ptr, PTR + | mov ARG1_64, SINKFRAME->closure + | lea ARG2_64, DECODER->sink.status + | callp endmsg } // Counter previous alignment. @@ -657,9 +763,9 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { // Align stack. | sub rsp, 8 | mov DECODER, ARG1_64 - | mov FRAME, DECODER:ARG1_64->dispatcher.top - | lea BYTEREGION, DECODER:ARG1_64->str_byteregion - | mov CLOSURE, FRAME->closure + | mov FRAME, DECODER:ARG1_64->top + | mov SINKFRAME, DECODER:ARG1_64->sink.top + | mov CLOSURE, SINKFRAME->closure | mov PTR, DECODER->ptr // TODO: push return addresses for re-entry (will be necessary for multiple @@ -680,54 +786,65 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { | leave | ret - upb_handlers *h = plan->handlers; - for (int i = 0; i < h->msgs_len; i++) - upb_decoderplan_jit_msg(plan, h->msgs[i]); -} - -static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f, - uint32_t *pclabel_count) { - f->jit_pclabel = (*pclabel_count)++; - f->jit_pclabel_notypecheck = (*pclabel_count)++; -} - -static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m, - uint32_t *pclabel_count) { - m->jit_startmsg_pclabel = (*pclabel_count)++; - m->jit_afterstartmsg_pclabel = (*pclabel_count)++; - m->jit_endofbuf_pclabel = (*pclabel_count)++; - m->jit_endofmsg_pclabel = (*pclabel_count)++; - m->jit_dyndispatch_pclabel = (*pclabel_count)++; - m->jit_unknownfield_pclabel = (*pclabel_count)++; - m->max_field_number = 0; upb_inttable_iter i; - upb_inttable_begin(&i, &m->fieldtab); + upb_inttable_begin(&i, &plan->msginfo); for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - uint32_t key = upb_inttable_iter_key(&i); - m->max_field_number = UPB_MAX(m->max_field_number, key); - upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i)); - upb_decoderplan_jit_assignfieldlabs(f, pclabel_count); + const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); + upb_decoderplan_jit_msg(plan, h); + } +} + +static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan, + const upb_handlers *h) { + // Limit the DFS. + if (upb_inttable_lookupptr(&plan->pclabels, h)) return; + + upb_inttable_insertptr(&plan->pclabels, h, + upb_value_uint32(plan->pclabel_count)); + plan->pclabel_count += TOTAL_MSG_PCLABELS; + + upb_jitmsginfo *info = malloc(sizeof(*info)); + info->max_field_number = 0; + upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info)); + + upb_msg_iter i; + upb_msg_begin(&i, upb_handlers_msgdef(h)); + for(; !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + info->max_field_number = + UPB_MAX(info->max_field_number, upb_fielddef_number(f)); + upb_inttable_insertptr(&plan->pclabels, f, + upb_value_uint32(plan->pclabel_count)); + plan->pclabel_count += TOTAL_FIELD_PCLABELS; + + // Discover the whole graph of handlers depth-first. We will probably + // revise this later to be more explicit about the list of handlers that + // the plan should include. + if (upb_fielddef_issubmsg(f)) { + const upb_handlers *subh = upb_handlers_getsubhandlers(h, f); + if (subh) upb_decoderplan_jit_assignpclabels(plan, subh); + } } // TODO: support large field numbers by either using a hash table or // generating code for a binary search. For now large field numbers // will just fall back to the table decoder. - m->max_field_number = UPB_MIN(m->max_field_number, 16000); - m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*)); + info->max_field_number = UPB_MIN(info->max_field_number, 16000); + info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*)); } static void upb_decoderplan_makejit(upb_decoderplan *plan) { + upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR); plan->debug_info = NULL; // Assign pclabels. - uint32_t pclabel_count = 0; - upb_handlers *h = plan->handlers; - for (int i = 0; i < h->msgs_len; i++) - upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count); + plan->pclabel_count = 0; + upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32); + upb_decoderplan_jit_assignpclabels(plan, plan->handlers); void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); dasm_init(plan, 1); dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX); - dasm_growpc(plan, pclabel_count); + dasm_growpc(plan, plan->pclabel_count); dasm_setup(plan, upb_jit_actionlist); upb_decoderplan_jit(plan); @@ -744,38 +861,53 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { dasm_encode(plan, plan->jit_code); // Create dispatch tables. - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *m = h->msgs[i]; + upb_inttable_iter i; + upb_inttable_begin(&i, &plan->msginfo); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); + upb_jitmsginfo *mi = upb_getmsginfo(plan, h); // We jump to after the startmsg handler since it is called before entering // the JIT (either by upb_decoder or by a previous call to the JIT). - m->jit_func = - plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel); - for (uint32_t j = 0; j <= m->max_field_number; j++) { - upb_fhandlers *f = upb_mhandlers_lookup(m, j); + mi->jit_func = plan->jit_code + + dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG)); + for (uint32_t j = 0; j <= mi->max_field_number; j++) { + const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j); if (f) { - m->tablearray[j] = - plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel); + mi->tablearray[j] = plan->jit_code + + dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD)); } else { // TODO: extend the JIT to handle unknown fields. // For the moment we exit the JIT for any unknown field. - m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit]; + mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit]; } } } + upb_inttable_uninit(&plan->pclabels); + dasm_free(plan); free(globals); mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ); +#ifndef NDEBUG // View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code // Or: ndisasm -b 64 /tmp/machine-code FILE *f = fopen("/tmp/machine-code", "wb"); fwrite(plan->jit_code, plan->jit_size, 1, f); fclose(f); +#endif } static void upb_decoderplan_freejit(upb_decoderplan *plan) { + upb_inttable_iter i; + upb_inttable_begin(&i, &plan->msginfo); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i)); + free(mi->tablearray); + free(mi); + } + upb_inttable_uninit(&plan->msginfo); munmap(plan->jit_code, plan->jit_size); free(plan->debug_info); // TODO: unregister @@ -783,7 +915,7 @@ static void upb_decoderplan_freejit(upb_decoderplan *plan) { static void upb_decoder_enterjit(upb_decoder *d) { if (d->plan->jit_code && - d->dispatcher.top == d->dispatcher.stack && + d->sink.top == d->sink.stack && d->ptr && d->ptr < d->jit_end) { #ifndef NDEBUG register uint64_t rbx asm ("rbx") = 11; @@ -795,7 +927,9 @@ static void upb_decoder_enterjit(upb_decoder *d) { // Decodes as many fields as possible, updating d->ptr appropriately, // before falling through to the slow(er) path. void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code; - upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func); + upb_jitmsginfo *mi = upb_getmsginfo(d->plan, d->plan->handlers); + assert(mi); + upb_jit_decode(d, mi->jit_func); assert(d->ptr <= d->end); // Test that callee-save registers were properly restored. diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 40b901d..4e69c0c 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -5,10 +5,14 @@ * Author: Josh Haberman */ +#include "upb/pb/glue.h" + +#include +#include +#include #include "upb/bytestream.h" #include "upb/descriptor/reader.h" #include "upb/pb/decoder.h" -#include "upb/pb/glue.h" upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, void *owner, upb_status *status) { @@ -16,16 +20,14 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str, len); - upb_handlers *h = upb_handlers_new(); - upb_descreader_reghandlers(h); - + const upb_handlers *h = upb_descreader_newhandlers(&h); upb_decoderplan *p = upb_decoderplan_new(h, false); upb_decoder d; upb_decoder_init(&d); - upb_handlers_unref(h); + upb_handlers_unref(h, &h); upb_descreader r; upb_descreader_init(&r); - upb_decoder_resetplan(&d, p, 0); + upb_decoder_resetplan(&d, p); upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r); upb_success_t ret = upb_decoder_decode(&d); diff --git a/upb/pb/glue.h b/upb/pb/glue.h index 6179d8d..4bbc975 100644 --- a/upb/pb/glue.h +++ b/upb/pb/glue.h @@ -27,8 +27,7 @@ #define UPB_GLUE_H #include -#include "upb/upb.h" -#include "upb/def.h" +#include "upb/symtab.h" #ifdef __cplusplus extern "C" { @@ -55,6 +54,29 @@ char *upb_readfile(const char *filename, size_t *len); #ifdef __cplusplus } /* extern "C" */ + +namespace upb { + +// All routines that load descriptors expect the descriptor to be a +// FileDescriptorSet. +inline bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname, + Status* status) { + return upb_load_descriptor_file_into_symtab(s, fname, status); +} + +inline bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str, + size_t len, Status* status) { + return upb_load_descriptor_into_symtab(s, str, len, status); +} + +// Templated so it can accept both string and std::string. +template +bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) { + return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status); +} + +} // namespace upb + #endif #endif diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 2fe3452..3770afc 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -5,11 +5,14 @@ * Author: Josh Haberman */ +#include "upb/pb/textprinter.h" + #include #include #include +#include #include -#include "upb/pb/textprinter.h" +#include struct _upb_textprinter { upb_bytesink *sink; @@ -20,7 +23,7 @@ struct _upb_textprinter { #define CHECK(x) if ((x) < 0) goto err; -static int upb_textprinter_indent(upb_textprinter *p) { +static int indent(upb_textprinter *p) { if (!p->single_line) CHECK(upb_bytesink_putrepeated(p->sink, ' ', p->indent_depth*2)); return 0; @@ -28,37 +31,32 @@ err: return -1; } -static int upb_textprinter_endfield(upb_textprinter *p) { +static int endfield(upb_textprinter *p) { CHECK(upb_bytesink_putc(p->sink, p->single_line ? ' ' : '\n')); return 0; err: return -1; } -static int upb_textprinter_putescaped(upb_textprinter *p, - const upb_byteregion *bytes, - bool preserve_utf8) { +static int putescaped(upb_textprinter *p, const char *buf, size_t len, + bool preserve_utf8) { // Based on CEscapeInternal() from Google's protobuf release. - // TODO; we could read directly from a bytesrc's buffer instead. - // TODO; we could write byteregions to the sink when possible. - char dstbuf[512], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); - char *buf = malloc(upb_byteregion_len(bytes)), *src = buf; - char *end = src + upb_byteregion_len(bytes); - upb_byteregion_copyall(bytes, buf); + char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); + const char *end = buf + len; // I think hex is prettier and more useful, but proto2 uses octal; should // investigate whether it can parse hex also. const bool use_hex = false; bool last_hex_escape = false; // true if last output char was \xNN - for (; src < end; src++) { + for (; buf < end; buf++) { if (dstend - dst < 4) { CHECK(upb_bytesink_write(p->sink, dstbuf, dst - dstbuf)); dst = dstbuf; } bool is_hex_escape = false; - switch (*src) { + switch (*buf) { case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break; case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break; case '\t': *(dst++) = '\\'; *(dst++) = 't'; break; @@ -66,123 +64,123 @@ static int upb_textprinter_putescaped(upb_textprinter *p, case '\'': *(dst++) = '\\'; *(dst++) = '\''; break; case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break; default: - // Note that if we emit \xNN and the src character after that is a hex + // Note that if we emit \xNN and the buf character after that is a hex // digit then that digit must be escaped too to prevent it being // interpreted as part of the character code by C. - if ((!preserve_utf8 || (uint8_t)*src < 0x80) && - (!isprint(*src) || (last_hex_escape && isxdigit(*src)))) { - sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src); + if ((!preserve_utf8 || (uint8_t)*buf < 0x80) && + (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) { + sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf); is_hex_escape = use_hex; dst += 4; } else { - *(dst++) = *src; break; + *(dst++) = *buf; break; } } last_hex_escape = is_hex_escape; } // Flush remaining data. CHECK(upb_bytesink_write(p->sink, dst, dst - dstbuf)); - free(buf); return 0; err: - free(buf); return -1; } -#define TYPE(member, fmt) \ - static upb_flow_t upb_textprinter_put ## member(void *_p, upb_value fval, \ - upb_value val) { \ +#define TYPE(name, ctype, fmt) \ + static bool put ## name(void *_p, void *fval, ctype val) { \ upb_textprinter *p = _p; \ - const upb_fielddef *f = upb_value_getfielddef(fval); \ - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); \ - CHECK(upb_textprinter_indent(p)); \ + const upb_fielddef *f = fval; \ + CHECK(indent(p)); \ CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \ CHECK(upb_bytesink_writestr(p->sink, ": ")); \ - CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val))); \ - CHECK(upb_textprinter_endfield(p)); \ - return UPB_CONTINUE; \ + CHECK(upb_bytesink_printf(p->sink, fmt, val)); \ + CHECK(endfield(p)); \ + return true; \ err: \ - upb_bytesink_rewind(p->sink, start_ofs); \ - return UPB_BREAK; \ + return false; \ } #define STRINGIFY_HELPER(x) #x #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x) -TYPE(double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") -TYPE(float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") -TYPE(int64, "%" PRId64) -TYPE(uint64, "%" PRIu64) -TYPE(int32, "%" PRId32) -TYPE(uint32, "%" PRIu32); -TYPE(bool, "%hhu"); +TYPE(int32, int32_t, "%" PRId32) +TYPE(int64, int64_t, "%" PRId64) +TYPE(uint32, uint32_t, "%" PRIu32); +TYPE(uint64, uint64_t, "%" PRIu64) +TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") +TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") +TYPE(bool, bool, "%hhu"); // Output a symbolic value from the enum if found, else just print as int32. -static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval, - upb_value val) { +static bool putenum(void *_p, void *fval, int32_t val) { upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); - const upb_fielddef *f = upb_value_getfielddef(fval); - const upb_enumdef *enum_def = - upb_downcast_enumdef_const(upb_fielddef_subdef(f)); - const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val)); + const upb_fielddef *f = fval; + const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f)); + const char *label = upb_enumdef_iton(enum_def, val); if (label) { CHECK(upb_bytesink_writestr(p->sink, label)); } else { - CHECK(upb_textprinter_putint32(_p, fval, val)); + CHECK(putint32(_p, fval, val)); } - return UPB_CONTINUE; + return true; err: - upb_bytesink_rewind(p->sink, start_ofs); - return UPB_BREAK; + return false; } -static upb_flow_t upb_textprinter_putstr(void *_p, upb_value fval, - upb_value val) { +static void *startstr(void *_p, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); - const upb_fielddef *f = upb_value_getfielddef(fval); - CHECK(upb_bytesink_putc(p->sink, '"')); - CHECK(upb_textprinter_putescaped(p, upb_value_getbyteregion(val), - f->type == UPB_TYPE(STRING))); CHECK(upb_bytesink_putc(p->sink, '"')); - return UPB_CONTINUE; + return p; err: - upb_bytesink_rewind(p->sink, start_ofs); return UPB_BREAK; } -static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) { +static bool endstr(void *_p, void *fval) { + UPB_UNUSED(fval); + upb_textprinter *p = _p; + CHECK(upb_bytesink_putc(p->sink, '"')); + return true; +err: + return false; +} + +static size_t putstr(void *_p, void *fval, const char *buf, size_t len) { upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); - const upb_fielddef *f = upb_value_getfielddef(fval); - CHECK(upb_textprinter_indent(p)); + const upb_fielddef *f = fval; + CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE(STRING))); + return len; +err: + return 0; +} + +static void *startsubmsg(void *_p, void *fval) { + upb_textprinter *p = _p; + const upb_fielddef *f = fval; + CHECK(indent(p)); CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f))); if (!p->single_line) CHECK(upb_bytesink_putc(p->sink, '\n')); p->indent_depth++; - return UPB_CONTINUE_WITH(_p); + return _p; err: - upb_bytesink_rewind(p->sink, start_ofs); - return UPB_SBREAK; + return UPB_BREAK; } -static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) { - (void)fval; +static bool endsubmsg(void *_p, void *fval) { + UPB_UNUSED(fval); upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); p->indent_depth--; - CHECK(upb_textprinter_indent(p)); + CHECK(indent(p)); CHECK(upb_bytesink_putc(p->sink, '}')); - CHECK(upb_textprinter_endfield(p)); - return UPB_CONTINUE; + CHECK(endfield(p)); + return true; err: - upb_bytesink_rewind(p->sink, start_ofs); - return UPB_BREAK; + return false; } -upb_textprinter *upb_textprinter_new(void) { +upb_textprinter *upb_textprinter_new() { upb_textprinter *p = malloc(sizeof(*p)); return p; } @@ -196,22 +194,61 @@ void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, p->indent_depth = 0; } -static void upb_textprinter_onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) { +static void onmreg(void *c, upb_handlers *h) { (void)c; - upb_fhandlers_setstartsubmsg(fh, &upb_textprinter_startsubmsg); - upb_fhandlers_setendsubmsg(fh, &upb_textprinter_endsubmsg); -#define F(type) &upb_textprinter_put ## type - static upb_value_handler *fptrs[] = {NULL, F(double), F(float), F(int64), - F(uint64), F(int32), F(uint64), F(uint32), F(bool), F(str), - NULL, NULL, F(str), F(uint32), F(enum), F(int32), - F(int64), F(int32), F(int64)}; - upb_fhandlers_setvalue(fh, fptrs[f->type]); - upb_value fval; - upb_value_setfielddef(&fval, f); - upb_fhandlers_setfval(fh, fval); + const upb_msgdef *m = upb_handlers_msgdef(h); + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_SINT32: + case UPB_TYPE_SFIXED32: + upb_handlers_setint32(h, f, putint32, f, NULL); + break; + case UPB_TYPE_SINT64: + case UPB_TYPE_SFIXED64: + case UPB_TYPE_INT64: + upb_handlers_setint64(h, f, putint64, f, NULL); + break; + case UPB_TYPE_UINT32: + case UPB_TYPE_FIXED32: + upb_handlers_setuint32(h, f, putuint32, f, NULL); + break; + case UPB_TYPE_UINT64: + case UPB_TYPE_FIXED64: + upb_handlers_setuint64(h, f, putuint64, f, NULL); + break; + case UPB_TYPE_FLOAT: + upb_handlers_setfloat(h, f, putfloat, f, NULL); + break; + case UPB_TYPE_DOUBLE: + upb_handlers_setdouble(h, f, putdouble, f, NULL); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, f, putbool, f, NULL); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + upb_handlers_setstartstr(h, f, startstr, f, NULL); + upb_handlers_setstring(h, f, putstr, f, NULL); + upb_handlers_setendstr(h, f, endstr, f, NULL); + break; + case UPB_TYPE_GROUP: + case UPB_TYPE_MESSAGE: + upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL); + upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL); + break; + case UPB_TYPE_ENUM: + upb_handlers_setint32(h, f, putenum, f, NULL); + default: + assert(false); + break; + } + } } -upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, const upb_msgdef *m) { - return upb_handlers_regmsgdef( - h, m, NULL, &upb_textprinter_onfreg, NULL); +const upb_handlers *upb_textprinter_newhandlers(const void *owner, + const upb_msgdef *m) { + return upb_handlers_newfrozen(m, owner, &onmreg, NULL); } diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h index 174148e..6d111d2 100644 --- a/upb/pb/textprinter.h +++ b/upb/pb/textprinter.h @@ -18,11 +18,12 @@ extern "C" { struct _upb_textprinter; typedef struct _upb_textprinter upb_textprinter; -upb_textprinter *upb_textprinter_new(void); +upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, bool single_line); -upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, const upb_msgdef *m); +const upb_handlers *upb_textprinter_newhandlers(const void *owner, + const upb_msgdef *m); #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/pb/varint.c b/upb/pb/varint.c index 45caec1..d6d6161 100644 --- a/upb/pb/varint.c +++ b/upb/pb/varint.c @@ -7,16 +7,64 @@ #include "upb/pb/varint.h" +// A basic branch-based decoder, uses 32-bit values to get good performance +// on 32-bit architectures (but performs well on 64-bits also). +// This scheme comes from the original Google Protobuf implementation (proto2). +upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) { + upb_decoderet err = {NULL, 0}; + const char *p = r.p; + uint32_t low = (uint32_t)r.val; + uint32_t high = 0; + uint32_t b; + b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7fU) << 28; + high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done; + return err; + +done: + r.val = ((uint64_t)high << 32) | low; + r.p = p; + return r; +} + +// Like the previous, but uses 64-bit values. +upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) { + const char *p = r.p; + uint64_t val = r.val; + uint64_t b; + upb_decoderet err = {NULL, 0}; + b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done; + return err; + +done: + r.val = val; + r.p = p; + return r; +} + // Given an encoded varint v, returns an integer with a single bit set that // indicates the end of the varint. Subtracting one from this value will // yield a mask that leaves only bits that are part of the varint. Returns // 0 if the varint is unterminated. -INLINE uint64_t upb_get_vstopbit(uint64_t v) { +static uint64_t upb_get_vstopbit(uint64_t v) { uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL; return ~cbits & (cbits+1); } -INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; } +// A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { uint64_t b; memcpy(&b, r.p, sizeof(b)); @@ -35,14 +83,15 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { return my_r; } +// A branchless decoder. Credit to Daniel Wright for the bit-twiddling. upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { uint64_t b; memcpy(&b, r.p, sizeof(b)); uint64_t stop_bit = upb_get_vstopbit(b); b &= (stop_bit - 1); - b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f); - b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff); - b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff); + b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL); + b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL); + b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL); if (stop_bit == 0) { // Error: unterminated varint. upb_decoderet err_r = {(void*)0, 0}; diff --git a/upb/pb/varint.h b/upb/pb/varint.h index c0e0134..c4d67ba 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -49,71 +49,32 @@ typedef struct { uint64_t val; } upb_decoderet; -// A basic branch-based decoder, uses 32-bit values to get good performance -// on 32-bit architectures (but performs well on 64-bits also). -INLINE upb_decoderet upb_vdecode_branch32(const char *p) { - upb_decoderet r = {NULL, 0}; - uint32_t low, high = 0; - uint32_t b; - b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = ((uint64_t)high << 32) | low; - r.p = p; - return r; -} - -// Like the previous, but uses 64-bit values. -INLINE upb_decoderet upb_vdecode_branch64(const char *p) { - uint64_t val; - uint64_t b; - upb_decoderet r = {NULL, 0}; - b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = val; - r.p = p; - return r; -} - -// Decodes a varint of at most 8 bytes without branching (except for error). +// Four functions for decoding a varint of at most eight bytes. They are all +// functionally identical, but are implemented in different ways and likely have +// different performance profiles. We keep them around for performance testing. +// +// Note that these functions may not read byte-by-byte, so they must not be used +// unless there are at least eight bytes left in the buffer! +upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r); +upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r); upb_decoderet upb_vdecode_max8_wright(upb_decoderet r); - -// Another implementation of the previous. upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r); // Template for a function that checks the first two bytes with branching -// and dispatches 2-10 bytes with a separate function. -#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ - uint8_t *p = (uint8_t*)_p; \ - if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \ - upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \ - if ((*(p + 1) & 0x80) == 0) return r; \ - return decode_max8_function(r); \ +// and dispatches 2-10 bytes with a separate function. Note that this may read +// up to 10 bytes, so it must not be used unless there are at least ten bytes +// left in the buffer! +#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ +INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ + uint8_t *p = (uint8_t*)_p; \ + if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \ + upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \ + if ((*(p + 1) & 0x80) == 0) return r; \ + return decode_max8_function(r); \ } +UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32); +UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64); UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); #undef UPB_VARINT_DECODER_CHECK2 @@ -121,11 +82,10 @@ UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); // Our canonical functions for decoding varints, based on the currently // favored best-performing implementations. INLINE upb_decoderet upb_vdecode_fast(const char *p) { - // Use nobranch2 on 64-bit, branch32 on 32-bit. if (sizeof(long) == 8) return upb_vdecode_check2_massimino(p); else - return upb_vdecode_branch32(p); + return upb_vdecode_check2_branch32(p); } INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { @@ -154,9 +114,9 @@ INLINE size_t upb_vencode64(uint64_t val, char *buf) { if (val == 0) { buf[0] = 0; return 1; } size_t i = 0; while (val) { - uint8_t byte = val & 0x7f; + uint8_t byte = val & 0x7fU; val >>= 7; - if (val) byte |= 0x80; + if (val) byte |= 0x80U; buf[i++] = byte; } return i; @@ -169,7 +129,7 @@ INLINE uint64_t upb_vencode32(uint32_t val) { uint64_t ret = 0; assert(bytes <= 5); memcpy(&ret, buf, bytes); - assert(ret <= 0xffffffffff); + assert(ret <= 0xffffffffffU); return ret; } diff --git a/upb/refcount.c b/upb/refcount.c deleted file mode 100644 index d729a2a..0000000 --- a/upb/refcount.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2012 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#include -#include "upb/refcount.h" - -// TODO(haberman): require client to define these if ref debugging is on. -#ifndef UPB_LOCK -#define UPB_LOCK -#endif - -#ifndef UPB_UNLOCK -#define UPB_UNLOCK -#endif - -/* arch-specific atomic primitives *******************************************/ - -#ifdef UPB_THREAD_UNSAFE ////////////////////////////////////////////////////// - -INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; } -INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; } - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 /////////////////// - -INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } -INLINE bool upb_atomic_dec(uint32_t *a) { - return __sync_sub_and_fetch(a, 1) == 0; -} - -#elif defined(WIN32) /////////////////////////////////////////////////////////// - -#include - -INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } -INLINE bool upb_atomic_dec(upb_atomic_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -// Reserved index values. -#define UPB_INDEX_UNDEFINED UINT16_MAX -#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1) - -static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) { - if (upb_refcount_merged(r, from)) return; - *r->count += *from->count; - free(from->count); - upb_refcount *base = from; - - // Set all refcount pointers in the "from" chain to the merged refcount. - do { from->count = r->count; } while ((from = from->next) != base); - - // Merge the two circularly linked lists by swapping their next pointers. - upb_refcount *tmp = r->next; - r->next = base->next; - base->next = tmp; -} - -// Tarjan's algorithm, see: -// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm - -typedef struct { - int index; - upb_refcount **stack; - int stack_len; - upb_getsuccessors *func; -} upb_tarjan_state; - -static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state); - -void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) { - upb_tarjan_state *state = _state; - if (subobj->index == UPB_INDEX_UNDEFINED) { - // Subdef has not yet been visited; recurse on it. - upb_refcount_dofindscc(subobj, state); - obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink); - } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) { - // Subdef is in the stack and hence in the current SCC. - obj->lowlink = UPB_MIN(obj->lowlink, subobj->index); - } -} - -static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) { - obj->index = state->index; - obj->lowlink = state->index; - state->index++; - state->stack[state->stack_len++] = obj; - - state->func(obj, state); // Visit successors. - - if (obj->lowlink == obj->index) { - upb_refcount *scc_obj; - while ((scc_obj = state->stack[--state->stack_len]) != obj) { - upb_refcount_merge(obj, scc_obj); - scc_obj->index = UPB_INDEX_NOT_IN_STACK; - } - obj->index = UPB_INDEX_NOT_IN_STACK; - } -} - -bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) { - // TODO(haberman): allocate less memory. We can't use n as a bound because - // it doesn't include fielddefs. Could either use a dynamically-resizing - // array or think of some other way. - upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func}; - if (state.stack == NULL) return false; - for (int i = 0; i < n; i++) - if (refs[i]->index == UPB_INDEX_UNDEFINED) - upb_refcount_dofindscc(refs[i], &state); - free(state.stack); - return true; -} - -#ifdef UPB_DEBUG_REFS -static void upb_refcount_track(const upb_refcount *r, const void *owner) { - // Caller must not already own a ref. - assert(upb_inttable_lookup(r->refs, (uintptr_t)owner) == NULL); - - // If a ref is leaked we want to blame the leak on the whoever leaked the - // ref, not on who originally allocated the refcounted object. We accomplish - // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some - // memory and arrange setup pointers like so: - // - // upb_refcount - // +----------+ +---------+ - // | count |<-+ | - // +----------+ +----------+ - // | table |---X-->| malloc'd | - // +----------+ | memory | - // +----------+ - // - // Since the "malloc'd memory" is allocated inside of "ref" and free'd in - // unref, it will cause a leak if not unref'd. And since the leaked memory - // points to the object itself, the object will be considered "indirectly - // lost" by tools like Valgrind and not shown unless requested (which is good - // because the object's creator may not be responsible for the leak). But we - // have to hide the pointer marked "X" above from Valgrind, otherwise the - // malloc'd memory will appear to be indirectly leaked and the object itself - // will still be considered the primary leak. We hide this pointer from - // Valgrind (et all) by doing a bitwise not on it. - const upb_refcount **target = malloc(sizeof(void*)); - uintptr_t obfuscated = ~(uintptr_t)target; - *target = r; - upb_inttable_insert(r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated)); -} - -static void upb_refcount_untrack(const upb_refcount *r, const void *owner) { - upb_value v; - bool success = upb_inttable_remove(r->refs, (uintptr_t)owner, &v); - assert(success); - if (success) { - // Must un-obfuscate the pointer (see above). - free((void*)(~upb_value_getuint64(v))); - } -} -#endif - - -/* upb_refcount **************************************************************/ - -bool upb_refcount_init(upb_refcount *r, const void *owner) { - (void)owner; - r->count = malloc(sizeof(uint32_t)); - if (!r->count) return false; - // Initializing this here means upb_refcount_findscc() can only run once for - // each refcount; may need to revise this to be more flexible. - r->index = UPB_INDEX_UNDEFINED; - r->next = r; -#ifdef UPB_DEBUG_REFS - // We don't detect malloc() failures for UPB_DEBUG_REFS. - r->refs = malloc(sizeof(*r->refs)); - upb_inttable_init(r->refs); - *r->count = 0; - upb_refcount_ref(r, owner); -#else - *r->count = 1; -#endif - return true; -} - -void upb_refcount_uninit(upb_refcount *r) { - (void)r; -#ifdef UPB_DEBUG_REFS - assert(upb_inttable_count(r->refs) == 0); - upb_inttable_uninit(r->refs); - free(r->refs); -#endif -} - -// Thread-safe operations ////////////////////////////////////////////////////// - -void upb_refcount_ref(const upb_refcount *r, const void *owner) { - (void)owner; - upb_atomic_inc(r->count); -#ifdef UPB_DEBUG_REFS - UPB_LOCK; - upb_refcount_track(r, owner); - UPB_UNLOCK; -#endif -} - -bool upb_refcount_unref(const upb_refcount *r, const void *owner) { - (void)owner; - bool ret = upb_atomic_dec(r->count); -#ifdef UPB_DEBUG_REFS - UPB_LOCK; - upb_refcount_untrack(r, owner); - UPB_UNLOCK; -#endif - if (ret) free(r->count); - return ret; -} - -void upb_refcount_donateref( - const upb_refcount *r, const void *from, const void *to) { - (void)r; (void)from; (void)to; - assert(from != to); -#ifdef UPB_DEBUG_REFS - UPB_LOCK; - upb_refcount_track(r, to); - upb_refcount_untrack(r, from); - UPB_UNLOCK; -#endif -} - -bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) { - return r->count == r2->count; -} diff --git a/upb/refcount.h b/upb/refcount.h deleted file mode 100644 index 91ad3b8..0000000 --- a/upb/refcount.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * A thread-safe refcount that can optionally track references for debugging - * purposes. It helps avoid circular references by allowing a - * strongly-connected component in the graph to share a refcount. - * - * This interface is internal to upb. - */ - -#ifndef UPB_REFCOUNT_H_ -#define UPB_REFCOUNT_H_ - -#include -#include -#include "upb/table.h" - -#ifndef NDEBUG -#define UPB_DEBUG_REFS -#endif - -typedef struct _upb_refcount { - uint32_t *count; - struct _upb_refcount *next; // Circularly-linked list of this SCC. - uint16_t index; // For SCC algorithm. - uint16_t lowlink; // For SCC algorithm. -#ifdef UPB_DEBUG_REFS - // Make this a pointer so that we can modify it inside of const methods - // without ugly casts. - upb_inttable *refs; -#endif -} upb_refcount; - -// NON THREAD SAFE operations ////////////////////////////////////////////////// - -// Initializes the refcount with a single ref for the given owner. Returns -// NULL if memory could not be allocated. -bool upb_refcount_init(upb_refcount *r, const void *owner); - -// Uninitializes the refcount. May only be called after unref() returns true. -void upb_refcount_uninit(upb_refcount *r); - -// Finds strongly-connected components among some set of objects and merges all -// refcounts that share a SCC. The given function will be called when the -// algorithm needs to visit children of a particular object; the function -// should call upb_refcount_visit() once for each child obj. -// -// Returns false if memory allocation failed. -typedef void upb_getsuccessors(upb_refcount *obj, void*); -bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func); -void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure); - -// Thread-safe operations ////////////////////////////////////////////////////// - -// Increases the ref count, the new ref is owned by "owner" which must not -// already own a ref. Circular reference chains are not allowed. -void upb_refcount_ref(const upb_refcount *r, const void *owner); - -// Release a ref owned by owner, returns true if that was the last ref. -bool upb_refcount_unref(const upb_refcount *r, const void *owner); - -// Moves an existing ref from ref_donor to new_owner, without changing the -// overall ref count. -void upb_refcount_donateref( - const upb_refcount *r, const void *from, const void *to); - -// Returns true if these two objects share a refcount. -bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2); - -#endif // UPB_REFCOUNT_H_ diff --git a/upb/refcounted.c b/upb/refcounted.c new file mode 100644 index 0000000..54ad735 --- /dev/null +++ b/upb/refcounted.c @@ -0,0 +1,776 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Our key invariants are: + * 1. reference cycles never span groups + * 2. for ref2(to, from), we increment to's count iff group(from) != group(to) + * + * The previous two are how we avoid leaking cycles. Other important + * invariants are: + * 3. for mutable objects "from" and "to", if there exists a ref2(to, from) + * this implies group(from) == group(to). (In practice, what we implement + * is even stronger; "from" and "to" will share a group if there has *ever* + * been a ref2(to, from), but all that is necessary for correctness is the + * weaker one). + * 4. mutable and immutable objects are never in the same group. + */ + +#include "upb/refcounted.h" + +#include +#include + +uint32_t static_refcount = 1; + +/* arch-specific atomic primitives *******************************************/ + +#ifdef UPB_THREAD_UNSAFE ////////////////////////////////////////////////////// + +static void atomic_inc(uint32_t *a) { (*a)++; } +static bool atomic_dec(uint32_t *a) { return --(*a) == 0; } + +#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 /////////////////// + +static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } +static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; } + +#elif defined(WIN32) /////////////////////////////////////////////////////////// + +#include + +static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } +static bool atomic_dec(upb_atomic_t *a) { + return InterlockedDecrement(&a->val) == 0; +} + +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif + + +/* Reference tracking (debug only) ********************************************/ + +#ifdef UPB_DEBUG_REFS + +#ifdef UPB_THREAD_UNSAFE + +static void upb_lock() {} +static void upb_unlock() {} + +#else + +// User must define functions that lock/unlock a global mutex and link this +// file against them. +void upb_lock(); +void upb_unlock(); + +#endif + +// UPB_DEBUG_REFS mode counts on being able to malloc() memory in some +// code-paths that can normally never fail, like upb_refcounted_ref(). Since +// we have no way to propagage out-of-memory errors back to the user, and since +// these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. +#define CHECK_OOM(predicate) assert(predicate) + +typedef struct { + const upb_refcounted *obj; // Object we are taking a ref on. + int count; // How many refs there are (duplicates only allowed for ref2). + bool is_ref2; +} trackedref; + +trackedref *trackedref_new(const upb_refcounted *obj, bool is_ref2) { + trackedref *ret = malloc(sizeof(*ret)); + CHECK_OOM(ret); + ret->obj = obj; + ret->count = 1; + ret->is_ref2 = is_ref2; + return ret; +} + +// A reversible function for obfuscating a uintptr_t. +// This depends on sizeof(uintptr_t) <= sizeof(uint64_t), so would fail +// on 128-bit machines. +static uintptr_t obfuscate(const void *x) { return ~(uintptr_t)x; } + +static upb_value obfuscate_v(const void *x) { + return upb_value_uint64(obfuscate(x)); +} + +static const void *unobfuscate_v(upb_value x) { + return (void*)~upb_value_getuint64(x); +} + +// +// Stores tracked references according to the following scheme: +// (upb_inttable)reftracks = { +// (void*)owner -> (upb_inttable*) = { +// obfuscate((upb_refcounted*)obj) -> obfuscate((trackedref*)is_ref2) +// } +// } +// +// obfuscate() is a function that hides the link from the heap checker, so +// that it is not followed for the purposes of deciding what has "indirectly +// leaked." Even though we have a pointer to the trackedref*, we want it to +// appear leaked if it is not freed. +// +// This scheme gives us the following desirable properties: +// +// 1. We can easily determine whether an (owner->obj) ref already exists +// and error out if a duplicate ref is taken. +// +// 2. Because the trackedref is allocated with malloc() at the point that +// the ref is taken, that memory will be leaked if the ref is not released. +// Because the malloc'd memory points to the refcounted object, the object +// itself will only be considered "indirectly leaked" by smart memory +// checkers like Valgrind. This will correctly blame the ref leaker +// instead of the innocent code that allocated the object to begin with. +// +// 3. We can easily enumerate all of the ref2 refs for a given owner, which +// allows us to double-check that the object's visit() function is +// correctly implemented. +// +static upb_inttable reftracks = UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR); + +static upb_inttable *trygettab(const void *p) { + const upb_value *v = upb_inttable_lookupptr(&reftracks, p); + return v ? upb_value_getptr(*v) : NULL; +} + +// Gets or creates the tracking table for the given owner. +static upb_inttable *gettab(const void *p) { + upb_inttable *tab = trygettab(p); + if (tab == NULL) { + tab = malloc(sizeof(*tab)); + CHECK_OOM(tab); + upb_inttable_init(tab, UPB_CTYPE_UINT64); + upb_inttable_insertptr(&reftracks, p, upb_value_ptr(tab)); + } + return tab; +} + +static void track(const upb_refcounted *r, const void *owner, bool ref2) { + upb_lock(); + upb_inttable *refs = gettab(owner); + const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + if (v) { + trackedref *ref = (trackedref*)unobfuscate_v(*v); + // Since we allow multiple ref2's for the same to/from pair without + // allocating separate memory for each one, we lose the fine-grained + // tracking behavior we get with regular refs. Since ref2s only happen + // inside upb, we'll accept this limitation until/unless there is a really + // difficult upb-internal bug that can't be figured out without it. + assert(ref2); + assert(ref->is_ref2); + ref->count++; + } else { + trackedref *ref = trackedref_new(r, ref2); + bool ok = upb_inttable_insert(refs, obfuscate(r), obfuscate_v(ref)); + CHECK_OOM(ok); + } + upb_unlock(); +} + +static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { + upb_lock(); + upb_inttable *refs = gettab(owner); + const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + // This assert will fail if an owner attempts to release a ref it didn't have. + assert(v); + trackedref *ref = (trackedref*)unobfuscate_v(*v); + assert(ref->is_ref2 == ref2); + if (--ref->count == 0) { + free(ref); + upb_inttable_remove(refs, obfuscate(r), NULL); + if (upb_inttable_count(refs) == 0) { + upb_inttable_uninit(refs); + free(refs); + upb_inttable_removeptr(&reftracks, owner, NULL); + } + } + upb_unlock(); +} + +static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { + upb_lock(); + upb_inttable *refs = gettab(owner); + const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + assert(v); + trackedref *ref = (trackedref*)unobfuscate_v(*v); + assert(ref->obj == r); + assert(ref->is_ref2 == ref2); + upb_unlock(); +} + +// Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that +// originate from the given owner. +static void getref2s(const upb_refcounted *owner, upb_inttable *tab) { + upb_lock(); + upb_inttable *refs = trygettab(owner); + if (refs) { + upb_inttable_iter i; + upb_inttable_begin(&i, refs); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + trackedref *ref = (trackedref*)unobfuscate_v(upb_inttable_iter_value(&i)); + if (ref->is_ref2) { + upb_value count = upb_value_int32(ref->count); + bool ok = upb_inttable_insertptr(tab, ref->obj, count); + CHECK_OOM(ok); + } + } + } + upb_unlock(); +} + +typedef struct { + upb_inttable ref2; + const upb_refcounted *obj; +} check_state; + +static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj, + void *closure) { + check_state *s = closure; + assert(obj == s->obj); + assert(subobj); + upb_inttable *ref2 = &s->ref2; + upb_value v; + bool removed = upb_inttable_removeptr(ref2, subobj, &v); + // The following assertion will fail if the visit() function visits a subobj + // that it did not have a ref2 on, or visits the same subobj too many times. + assert(removed); + int32_t newcount = upb_value_getint32(v) - 1; + if (newcount > 0) { + upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount)); + } +} + +static void visit(const upb_refcounted *r, upb_refcounted_visit *v, + void *closure) { + // In DEBUG_REFS mode we know what existing ref2 refs there are, so we know + // exactly the set of nodes that visit() should visit. So we verify visit()'s + // correctness here. + check_state state; + state.obj = r; + bool ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32); + CHECK_OOM(ok); + getref2s(r, &state.ref2); + + // This should visit any children in the ref2 table. + if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state); + + // This assertion will fail if the visit() function missed any children. + assert(upb_inttable_count(&state.ref2) == 0); + upb_inttable_uninit(&state.ref2); + if (r->vtbl->visit) r->vtbl->visit(r, v, closure); +} + +#else + +static void track(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} + +static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} + +static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} + +static void visit(const upb_refcounted *r, upb_refcounted_visit *v, + void *closure) { + if (r->vtbl->visit) r->vtbl->visit(r, v, closure); +} + +#endif // UPB_DEBUG_REFS + + +/* freeze() *******************************************************************/ + +// The freeze() operation is by far the most complicated part of this scheme. +// We compute strongly-connected components and then mutate the graph such that +// we preserve the invariants documented at the top of this file. And we must +// handle out-of-memory errors gracefully (without leaving the graph +// inconsistent), which adds to the fun. + +// The state used by the freeze operation (shared across many functions). +typedef struct { + int depth; + int maxdepth; + uint64_t index; + // Maps upb_refcounted* -> attributes (color, etc). attr layout varies by + // color. + upb_inttable objattr; + upb_inttable stack; // stack of upb_refcounted* for Tarjan's algorithm. + upb_inttable groups; // array of uint32_t*, malloc'd refcounts for new groups + upb_status *status; + jmp_buf err; +} tarjan; + +static void release_ref2(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure); + +// Node attributes ///////////////////////////////////////////////////////////// + +// After our analysis phase all nodes will be either GRAY or WHITE. + +typedef enum { + BLACK = 0, // Object has not been seen. + GRAY, // Object has been found via a refgroup but may not be reachable. + GREEN, // Object is reachable and is currently on the Tarjan stack. + WHITE, // Object is reachable and has been assigned a group (SCC). +} color_t; + +UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); } +UPB_NORETURN static void oom(tarjan *t) { + upb_status_seterrliteral(t->status, "out of memory"); + err(t); +} + +uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) { + const upb_value *v = upb_inttable_lookupptr(&t->objattr, r); + return v ? upb_value_getuint64(*v) : 0; +} + +uint64_t getattr(const tarjan *t, const upb_refcounted *r) { + const upb_value *v = upb_inttable_lookupptr(&t->objattr, r); + assert(v); + return upb_value_getuint64(*v); +} + +void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) { + upb_inttable_removeptr(&t->objattr, r, NULL); + upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr)); +} + +static color_t color(tarjan *t, const upb_refcounted *r) { + return trygetattr(t, r) & 0x3; // Color is always stored in the low 2 bits. +} + +static void set_gray(tarjan *t, const upb_refcounted *r) { + assert(color(t, r) == BLACK); + setattr(t, r, GRAY); +} + +// Pushes an obj onto the Tarjan stack and sets it to GREEN. +static void push(tarjan *t, const upb_refcounted *r) { + assert(color(t, r) == BLACK || color(t, r) == GRAY); + // This defines the attr layout for the GREEN state. "index" and "lowlink" + // get 31 bits, which is plenty (limit of 2B objects frozen at a time). + setattr(t, r, GREEN | (t->index << 2) | (t->index << 33)); + if (++t->index == 0x80000000) { + upb_status_seterrliteral(t->status, "too many objects to freeze"); + err(t); + } + upb_inttable_push(&t->stack, upb_value_ptr((void*)r)); +} + +// Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its +// SCC group. +static upb_refcounted *pop(tarjan *t) { + upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack)); + assert(color(t, r) == GREEN); + // This defines the attr layout for nodes in the WHITE state. + // Top of group stack is [group, NULL]; we point at group. + setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8); + return r; +} + +static void newgroup(tarjan *t) { + uint32_t *group = malloc(sizeof(*group)); + if (!group) oom(t); + // Push group and empty group leader (we'll fill in leader later). + if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) || + !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) { + free(group); + oom(t); + } + *group = 0; +} + +static uint32_t idx(tarjan *t, const upb_refcounted *r) { + assert(color(t, r) == GREEN); + return (getattr(t, r) >> 2) & 0x7FFFFFFF; +} + +static uint32_t lowlink(tarjan *t, const upb_refcounted *r) { + if (color(t, r) == GREEN) { + return getattr(t, r) >> 33; + } else { + return UINT32_MAX; + } +} + +static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) { + assert(color(t, r) == GREEN); + setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF)); +} + +uint32_t *group(tarjan *t, upb_refcounted *r) { + assert(color(t, r) == WHITE); + uint64_t groupnum = getattr(t, r) >> 8; + const upb_value *v = upb_inttable_lookup(&t->groups, groupnum); + assert(v); + return upb_value_getptr(*v); +} + +// If the group leader for this object's group has not previously been set, +// the given object is assigned to be its leader. +static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) { + assert(color(t, r) == WHITE); + uint64_t leader_slot = (getattr(t, r) >> 8) + 1; + const upb_value *v = upb_inttable_lookup(&t->groups, leader_slot); + assert(v); + if (upb_value_getptr(*v)) { + return upb_value_getptr(*v); + } else { + upb_inttable_remove(&t->groups, leader_slot, NULL); + upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r)); + return r; + } +} + + +// Tarjan's algorithm ////////////////////////////////////////////////////////// + +// See: +// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm +static void do_tarjan(const upb_refcounted *obj, tarjan *t); + +static void tarjan_visit(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure) { + tarjan *t = closure; + if (++t->depth > t->maxdepth) { + upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth); + err(t); + } else if (subobj->is_frozen || color(t, subobj) == WHITE) { + // Do nothing: we don't want to visit or color already-frozen nodes, + // and WHITE nodes have already been assigned a SCC. + } else if (color(t, subobj) < GREEN) { + // Subdef has not yet been visited; recurse on it. + do_tarjan(subobj, t); + set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj))); + } else if (color(t, subobj) == GREEN) { + // Subdef is in the stack and hence in the current SCC. + set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj))); + } + --t->depth; +} + +static void do_tarjan(const upb_refcounted *obj, tarjan *t) { + if (color(t, obj) == BLACK) { + // We haven't seen this object's group; mark the whole group GRAY. + const upb_refcounted *o = obj; + do { set_gray(t, o); } while ((o = o->next) != obj); + } + + push(t, obj); + visit(obj, tarjan_visit, t); + if (lowlink(t, obj) == idx(t, obj)) { + newgroup(t); + while (pop(t) != obj) + ; + } +} + + +// freeze() //////////////////////////////////////////////////////////////////// + +static void crossref(const upb_refcounted *r, const upb_refcounted *subobj, + void *_t) { + tarjan *t = _t; + assert(color(t, r) > BLACK); + if (color(t, subobj) > BLACK && r->group != subobj->group) { + // Previously this ref was not reflected in subobj->group because they + // were in the same group; now that they are split a ref must be taken. + atomic_inc(subobj->group); + } +} + +static bool freeze(upb_refcounted *const*roots, int n, upb_status *s) { + volatile bool ret = false; + + // We run in two passes so that we can allocate all memory before performing + // any mutation of the input -- this allows us to leave the input unchanged + // in the case of memory allocation failure. + tarjan t; + t.index = 0; + t.depth = 0; + t.maxdepth = UPB_MAX_TYPE_DEPTH * 2; // May want to make this a parameter. + t.status = s; + if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1; + if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2; + if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3; + if (setjmp(t.err) != 0) goto err4; + + + for (int i = 0; i < n; i++) { + if (color(&t, roots[i]) < GREEN) { + do_tarjan(roots[i], &t); + } + } + + // If we've made it this far, no further errors are possible so it's safe to + // mutate the objects without risk of leaving them in an inconsistent state. + ret = true; + + // The transformation that follows requires care. The preconditions are: + // - all objects in attr map are WHITE or GRAY, and are in mutable groups + // (groups of all mutable objs) + // - no ref2(to, from) refs have incremented count(to) if both "to" and + // "from" are in our attr map (this follows from invariants (2) and (3)) + + // Pass 1: we remove WHITE objects from their mutable groups, and add them to + // new groups according to the SCC's we computed. These new groups will + // consist of only frozen objects. None will be immediately collectible, + // because WHITE objects are by definition reachable from one of "roots", + // which the caller must own refs on. + upb_inttable_iter i; + upb_inttable_begin(&i, &t.objattr); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i); + // Since removal from a singly-linked list requires access to the object's + // predecessor, we consider obj->next instead of obj for moving. With the + // while() loop we guarantee that we will visit every node's predecessor. + // Proof: + // 1. every node's predecessor is in our attr map. + // 2. though the loop body may change a node's predecessor, it will only + // change it to be the node we are currently operating on, so with a + // while() loop we guarantee ourselves the chance to remove each node. + while (color(&t, obj->next) == WHITE && + group(&t, obj->next) != obj->next->group) { + // Remove from old group. + upb_refcounted *move = obj->next; + if (obj == move) { + // Removing the last object from a group. + assert(*obj->group == obj->individual_count); + free(obj->group); + } else { + obj->next = move->next; + // This may decrease to zero; we'll collect GRAY objects (if any) that + // remain in the group in the third pass. + assert(*move->group >= move->individual_count); + *move->group -= move->individual_count; + } + + // Add to new group. + upb_refcounted *leader = groupleader(&t, move); + if (move == leader) { + // First object added to new group is its leader. + move->group = group(&t, move); + move->next = move; + *move->group = move->individual_count; + } else { + // Group already has at least one object in it. + assert(leader->group == group(&t, move)); + move->group = group(&t, move); + move->next = leader->next; + leader->next = move; + *move->group += move->individual_count; + } + + move->is_frozen = true; + } + } + + // Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must + // increment count(to) if group(obj) != group(to) (which could now be the + // case if "to" was just frozen). + upb_inttable_begin(&i, &t.objattr); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i); + visit(obj, crossref, &t); + } + + // Pass 3: GRAY objects are collected if their group's refcount dropped to + // zero when we removed its white nodes. This can happen if they had only + // been kept alive by virtue of sharing a group with an object that was just + // frozen. + // + // It is important that we do this last, since the GRAY object's free() + // function could call unref2() on just-frozen objects, which will decrement + // refs that were added in pass 2. + upb_inttable_begin(&i, &t.objattr); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i); + if (obj->group == NULL || *obj->group == 0) { + if (obj->group) { + // We eagerly free() the group's count (since we can't easily determine + // the group's remaining size it's the easiest way to ensure it gets + // done). + free(obj->group); + + // Visit to release ref2's (done in a separate pass since release_ref2 + // depends on o->group being unmodified so it can test merged()). + upb_refcounted *o = obj; + do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj); + + // Mark "group" fields as NULL so we know to free the objects later in + // this loop, but also don't try to delete the group twice. + o = obj; + do { o->group = NULL; } while ((o = o->next) != obj); + } + obj->vtbl->free(obj); + } + } + +err4: + if (!ret) { + upb_inttable_begin(&i, &t.groups); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) + free(upb_value_getptr(upb_inttable_iter_value(&i))); + } + upb_inttable_uninit(&t.groups); +err3: + upb_inttable_uninit(&t.stack); +err2: + upb_inttable_uninit(&t.objattr); +err1: + return ret; +} + + +/* Misc internal functions ***************************************************/ + +static bool merged(const upb_refcounted *r, const upb_refcounted *r2) { + return r->group == r2->group; +} + +static void merge(upb_refcounted *r, upb_refcounted *from) { + if (merged(r, from)) return; + *r->group += *from->group; + free(from->group); + upb_refcounted *base = from; + + // Set all refcount pointers in the "from" chain to the merged refcount. + // + // TODO(haberman): this linear algorithm can result in an overall O(n^2) bound + // if the user continuously extends a group by one object. Prevent this by + // using one of the techniques in this paper: + // ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf + do { from->group = r->group; } while ((from = from->next) != base); + + // Merge the two circularly linked lists by swapping their next pointers. + upb_refcounted *tmp = r->next; + r->next = base->next; + base->next = tmp; +} + +static void unref(const upb_refcounted *r); + +static void release_ref2(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure) { + UPB_UNUSED(closure); + if (!merged(obj, subobj)) { + assert(subobj->is_frozen); + unref(subobj); + } + untrack(subobj, obj, true); +} + +static void unref(const upb_refcounted *r) { + if (atomic_dec(r->group)) { + free(r->group); + + // In two passes, since release_ref2 needs a guarantee that any subobjs + // are alive. + const upb_refcounted *o = r; + do { visit(o, release_ref2, NULL); } while((o = o->next) != r); + + o = r; + do { + const upb_refcounted *next = o->next; + assert(o->is_frozen || o->individual_count == 0); + o->vtbl->free((upb_refcounted*)o); + o = next; + } while(o != r); + } +} + + +/* Public interface ***********************************************************/ + +bool upb_refcounted_init(upb_refcounted *r, + const struct upb_refcounted_vtbl *vtbl, + const void *owner) { + r->next = r; + r->vtbl = vtbl; + r->individual_count = 0; + r->is_frozen = false; + r->group = malloc(sizeof(*r->group)); + if (!r->group) return false; + *r->group = 0; + upb_refcounted_ref(r, owner); + return true; +} + +bool upb_refcounted_isfrozen(const upb_refcounted *r) { + return r->is_frozen; +} + +void upb_refcounted_ref(const upb_refcounted *r, const void *owner) { + if (!r->is_frozen) + ((upb_refcounted*)r)->individual_count++; + atomic_inc(r->group); + track(r, owner, false); +} + +void upb_refcounted_unref(const upb_refcounted *r, const void *owner) { + if (!r->is_frozen) + ((upb_refcounted*)r)->individual_count--; + unref(r); + untrack(r, owner, false); +} + +void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) { + assert(!from->is_frozen); // Non-const pointer implies this. + if (r->is_frozen) { + atomic_inc(r->group); + } else { + merge((upb_refcounted*)r, from); + } + track(r, from, true); +} + +void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) { + assert(!from->is_frozen); // Non-const pointer implies this. + if (r->is_frozen) { + unref(r); + } else { + assert(merged(r, from)); + } + untrack(r, from, true); +} + +void upb_refcounted_donateref( + const upb_refcounted *r, const void *from, const void *to) { + assert(from != to); + assert(to != NULL); + upb_refcounted_ref(r, to); + if (from != NULL) + upb_refcounted_unref(r, from); +} + +void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) { + checkref(r, owner, false); +} + +bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s) { + for (int i = 0; i < n; i++) { + assert(!roots[i]->is_frozen); + } + return freeze(roots, n, s); +} diff --git a/upb/refcounted.h b/upb/refcounted.h new file mode 100644 index 0000000..19993ca --- /dev/null +++ b/upb/refcounted.h @@ -0,0 +1,180 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A refcounting scheme that supports circular refs. It accomplishes this by + * partitioning the set of objects into groups such that no cycle spans groups; + * we can then reference-count the group as a whole and ignore refs within the + * group. When objects are mutable, these groups are computed very + * conservatively; we group any objects that have ever had a link between them. + * When objects are frozen, we compute strongly-connected components which + * allows us to be precise and only group objects that are actually cyclic. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. + */ + +#ifndef UPB_REFCOUNTED_H_ +#define UPB_REFCOUNTED_H_ + +#include "upb/table.h" + +// Reference tracking is designed to be used with a tool like Valgrind; when +// enabled, it will cause reference leaks to show up as actual memory leaks +// that are attributed to the code that leaked the ref, *not* the code that +// originally created the object. +#ifndef NDEBUG +#define UPB_DEBUG_REFS +#endif + +struct upb_refcounted_vtbl; + +#ifdef __cplusplus +namespace upb { class RefCounted; } +typedef upb::RefCounted upb_refcounted; +extern "C" { +#else +struct upb_refcounted; +typedef struct upb_refcounted upb_refcounted; +#endif + +#ifdef __cplusplus + +class upb::RefCounted { + public: + // Returns true if the given object is frozen. + bool IsFrozen() const; + + // Increases the ref count, the new ref is owned by "owner" which must not + // already own a ref (and should not itself be a refcounted object if the ref + // could possibly be circular; see below). + // Thread-safe iff "this" is frozen. + void Ref(const void *owner) const; + + // Release a ref that was acquired from upb_refcounted_ref() and collects any + // objects it can. + void Unref(const void *owner) const; + + // Moves an existing ref from "from" to "to", without changing the overall + // ref count. DonateRef(foo, NULL, owner) is the same as Ref(foo, owner), + // but "to" may not be NULL. + void DonateRef(const void *from, const void *to) const; + + // Verifies that a ref to the given object is currently held by the given + // owner. Only effective in UPB_DEBUG_REFS builds. + void CheckRef(const void *owner) const; + + private: + UPB_DISALLOW_POD_OPS(RefCounted); +#else +struct upb_refcounted { +#endif + // A single reference count shared by all objects in the group. + uint32_t *group; + + // A singly-linked list of all objects in the group. + upb_refcounted *next; + + // Table of function pointers for this type. + const struct upb_refcounted_vtbl *vtbl; + + // Maintained only when mutable, this tracks the number of refs (but not + // ref2's) to this object. *group should be the sum of all individual_count + // in the group. + uint32_t individual_count; + + bool is_frozen; +}; + +// Native C API. +bool upb_refcounted_isfrozen(const upb_refcounted *r); +void upb_refcounted_ref(const upb_refcounted *r, const void *owner); +void upb_refcounted_unref(const upb_refcounted *r, const void *owner); +void upb_refcounted_donateref( + const upb_refcounted *r, const void *from, const void *to); +void upb_refcounted_checkref(const upb_refcounted *r, const void *owner); + + +// Internal-to-upb Interface /////////////////////////////////////////////////// + +typedef void upb_refcounted_visit(const upb_refcounted *r, + const upb_refcounted *subobj, + void *closure); + +struct upb_refcounted_vtbl { + // Must visit all subobjects that are currently ref'd via upb_refcounted_ref2. + // Must be longjmp()-safe. + void (*visit)(const upb_refcounted *r, upb_refcounted_visit *visit, void *c); + + // Must free the object and release all references to other objects. + void (*free)(upb_refcounted *r); +}; + +// Initializes the refcounted with a single ref for the given owner. Returns +// false if memory could not be allocated. +bool upb_refcounted_init(upb_refcounted *r, + const struct upb_refcounted_vtbl *vtbl, + const void *owner); + +// Adds a ref from one refcounted object to another ("from" must not already +// own a ref). These refs may be circular; cycles will be collected correctly +// (if conservatively). These refs do not need to be freed in from's free() +// function. +void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from); + +// Removes a ref that was acquired from upb_refcounted_ref2(), and collects any +// object it can. This is only necessary when "from" no longer points to "r", +// and not from from's "free" function. +void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from); + +#define upb_ref2(r, from) \ + upb_refcounted_ref2((const upb_refcounted*)r, (upb_refcounted*)from) +#define upb_unref2(r, from) \ + upb_refcounted_unref2((const upb_refcounted*)r, (upb_refcounted*)from) + +// Freezes all mutable object reachable by ref2() refs from the given roots. +// This will split refcounting groups into precise SCC groups, so that +// refcounting of frozen objects can be more aggressive. If memory allocation +// fails or if more than 2**31 mutable objects are reachable from "roots", +// false is returned and the objects are unchanged. +// +// After this operation succeeds, the objects are frozen/const, and may not be +// used through non-const pointers. In particular, they may not be passed as +// the second parameter of upb_refcounted_{ref,unref}2(). On the upside, all +// operations on frozen refcounteds are threadsafe, and objects will be freed +// at the precise moment that they become unreachable. +// +// Caller must own refs on each object in the "roots" list. +bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s); + +// Shared by all compiled-in refcounted objects. +extern uint32_t static_refcount; + +#define UPB_REFCOUNT_INIT {&static_refcount, NULL, NULL, 0, true} + +#ifdef __cplusplus +} /* extern "C" */ + +// C++ Wrappers. +namespace upb { +inline bool RefCounted::IsFrozen() const { + return upb_refcounted_isfrozen(this); +} +inline void RefCounted::Ref(const void *owner) const { + upb_refcounted_ref(this, owner); +} +inline void RefCounted::Unref(const void *owner) const { + upb_refcounted_unref(this, owner); +} +inline void RefCounted::DonateRef(const void *from, const void *to) const { + upb_refcounted_donateref(this, from, to); +} +inline void RefCounted::CheckRef(const void *owner) const { + upb_refcounted_checkref(this, owner); +} +} // namespace upb +#endif + +#endif // UPB_REFCOUNT_H_ diff --git a/upb/sink.c b/upb/sink.c new file mode 100644 index 0000000..d829fa9 --- /dev/null +++ b/upb/sink.c @@ -0,0 +1,205 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include "upb/sink.h" + +static bool chkstack(upb_sink *s) { + if (s->top + 1 >= s->limit) { + upb_status_seterrliteral(&s->status, "Nesting too deep."); + return false; + } else { + return true; + } +} + +static upb_selector_t getselector(const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + bool ok = upb_getselector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; +} + +void upb_sink_init(upb_sink *s, const upb_handlers *h) { + s->limit = &s->stack[UPB_MAX_NESTING]; + s->top = NULL; + s->stack[0].h = h; + upb_status_init(&s->status); +} + +void upb_sink_reset(upb_sink *s, void *closure) { + s->top = s->stack; + s->top->closure = closure; +} + +void upb_sink_uninit(upb_sink *s) { + upb_status_uninit(&s->status); +} + +bool upb_sink_startmsg(upb_sink *s) { + const upb_handlers *h = s->top->h; + upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); + return startmsg ? startmsg(s->top->closure) : true; +} + +void upb_sink_endmsg(upb_sink *s, upb_status *status) { + UPB_UNUSED(status); + assert(s->top == s->stack); + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h); + if (endmsg) endmsg(s->top->closure, &s->status); +} + +#define PUTVAL(type, ctype, htype) \ + bool upb_sink_put ## type(upb_sink *s, const upb_fielddef *f, ctype val) { \ + upb_selector_t selector; \ + if (!upb_getselector(f, UPB_HANDLER_ ## htype, &selector)) return false; \ + upb_ ## type ## _handler *handler = (upb_ ## type ## _handler*) \ + upb_handlers_gethandler(s->top->h, selector); \ + if (handler) { \ + void *data = upb_handlers_gethandlerdata(s->top->h, selector); \ + if (!handler(s->top->closure, data, val)) return false; \ + } \ + return true; \ + } + +PUTVAL(int32, int32_t, INT32); +PUTVAL(int64, int64_t, INT64); +PUTVAL(uint32, uint32_t, UINT32); +PUTVAL(uint64, uint64_t, UINT64); +PUTVAL(float, float, FLOAT); +PUTVAL(double, double, DOUBLE); +PUTVAL(bool, bool, BOOL); +#undef PUTVAL + +size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, + const char *buf, size_t n) { + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STRING, &selector)) return false; + upb_string_handler *handler = (upb_string_handler*) + upb_handlers_gethandler(s->top->h, selector); + if (handler) { + void *data = upb_handlers_gethandlerdata(s->top->h, selector); \ + return handler(s->top->closure, data, buf, n); + } + return n; +} + +bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f) { + assert(upb_fielddef_isseq(f)); + if (!chkstack(s)) return false; + + void *subc = s->top->closure; + const upb_handlers *h = s->top->h; + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STARTSEQ, &selector)) return false; + upb_startfield_handler *startseq = + (upb_startfield_handler*)upb_handlers_gethandler(h, selector); + if (startseq) { + subc = startseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)); + if (!subc) return false; + } + + ++s->top; + s->top->end = getselector(f, UPB_HANDLER_ENDSEQ); + s->top->h = h; + s->top->closure = subc; + return true; +} + +bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f) { + upb_selector_t selector = s->top->end; + assert(selector == getselector(f, UPB_HANDLER_ENDSEQ)); + --s->top; + + const upb_handlers *h = s->top->h; + upb_endfield_handler *endseq = + (upb_endfield_handler*)upb_handlers_gethandler(h, selector); + return endseq ? + endseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : + true; +} + +bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint) { + assert(upb_fielddef_isstring(f)); + if (!chkstack(s)) return false; + + void *subc = s->top->closure; + const upb_handlers *h = s->top->h; + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STARTSTR, &selector)) return false; + upb_startstr_handler *startstr = + (upb_startstr_handler*)upb_handlers_gethandler(h, selector); + if (startstr) { + subc = startstr( + s->top->closure, upb_handlers_gethandlerdata(h, selector), size_hint); + if (!subc) return false; + } + + ++s->top; + s->top->end = getselector(f, UPB_HANDLER_ENDSTR); + s->top->h = h; + s->top->closure = subc; + return true; +} + +bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f) { + upb_selector_t selector = s->top->end; + assert(selector == getselector(f, UPB_HANDLER_ENDSTR)); + --s->top; + + const upb_handlers *h = s->top->h; + upb_endfield_handler *endstr = + (upb_endfield_handler*)upb_handlers_gethandler(h, selector); + return endstr ? + endstr(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : + true; +} + +bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f) { + assert(upb_fielddef_issubmsg(f)); + if (!chkstack(s)) return false; + + const upb_handlers *h = s->top->h; + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector)) return false; + upb_startfield_handler *startsubmsg = + (upb_startfield_handler*)upb_handlers_gethandler(h, selector); + void *subc = s->top->closure; + + if (startsubmsg) { + void *data = upb_handlers_gethandlerdata(h, selector); + subc = startsubmsg(s->top->closure, data); + if (!subc) return false; + } + + ++s->top; + s->top->end = getselector(f, UPB_HANDLER_ENDSUBMSG); + s->top->h = upb_handlers_getsubhandlers(h, f); + s->top->closure = subc; + upb_sink_startmsg(s); + return true; +} + +bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f) { + upb_selector_t selector = s->top->end; + assert(selector == getselector(f, UPB_HANDLER_ENDSUBMSG)); + + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h); + if (endmsg) endmsg(s->top->closure, &s->status); + --s->top; + + const upb_handlers *h = s->top->h; + upb_endfield_handler *endfield = + (upb_endfield_handler*)upb_handlers_gethandler(h, selector); + return endfield ? + endfield(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : + true; +} + +const upb_handlers *upb_sink_tophandlers(upb_sink *s) { + return s->top->h; +} diff --git a/upb/sink.h b/upb/sink.h new file mode 100644 index 0000000..2c0f037 --- /dev/null +++ b/upb/sink.h @@ -0,0 +1,82 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A upb_sink is an object that binds a upb_handlers object to some runtime + * state. It is the object that can actually receive data via the upb_handlers + * interface. + * + * Unlike upb_def and upb_handlers, upb_sink is never frozen, immutable, or + * thread-safe. You can create as many of them as you want, but each one may + * only be used in a single thread at a time. + * + * If we compare with class-based OOP, a you can think of a upb_def as an + * abstract base class, a upb_handlers as a concrete derived class, and a + * upb_sink as an object (class instance). + */ + +#ifndef UPB_SINK_H +#define UPB_SINK_H + +#include "upb/handlers.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_sink *******************************************************************/ + +typedef struct { + upb_selector_t end; // From the enclosing message (unused at top-level). + const upb_handlers *h; + void *closure; +} upb_sink_frame; + +typedef struct { + upb_sink_frame *top, *limit; + upb_sink_frame stack[UPB_MAX_NESTING]; + upb_status status; +} upb_sink; + +// Caller retains ownership of the handlers object. +void upb_sink_init(upb_sink *s, const upb_handlers *h); + +// Resets the state of the sink so that it is ready to accept new input. +// Any state from previously received data is discarded. "Closure" will be +// used as the top-level closure. +void upb_sink_reset(upb_sink *s, void *closure); + +void upb_sink_uninit(upb_sink *s); + +// Returns the handlers at the top of the stack. +const upb_handlers *upb_sink_tophandlers(upb_sink *s); + +// Functions for pushing data into the sink. +// These return false if processing should stop (either due to error or just +// to suspend). +bool upb_sink_startmsg(upb_sink *s); +void upb_sink_endmsg(upb_sink *s, upb_status *status); +bool upb_sink_putint32(upb_sink *s, const upb_fielddef *f, int32_t val); +bool upb_sink_putint64(upb_sink *s, const upb_fielddef *f, int64_t val); +bool upb_sink_putuint32(upb_sink *s, const upb_fielddef *f, uint32_t val); +bool upb_sink_putuint64(upb_sink *s, const upb_fielddef *f, uint64_t val); +bool upb_sink_putfloat(upb_sink *s, const upb_fielddef *f, float val); +bool upb_sink_putdouble(upb_sink *s, const upb_fielddef *f, double val); +bool upb_sink_putbool(upb_sink *s, const upb_fielddef *f, bool val); +bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint); +size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, const char *buf, + size_t len); +bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f); +bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f); +bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f); +bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f); +bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/upb/stdc/error.c b/upb/stdc/error.c index 313866c..85c9ca6 100644 --- a/upb/stdc/error.c +++ b/upb/stdc/error.c @@ -9,7 +9,6 @@ #include "upb/stdc/error.h" -#include #include void upb_status_fromerrno(upb_status *status, int code) { diff --git a/upb/stdc/io.c b/upb/stdc/io.c index 1abed32..5d36aa5 100644 --- a/upb/stdc/io.c +++ b/upb/stdc/io.c @@ -7,6 +7,9 @@ #include "upb/stdc/io.h" +#include +#include +#include #include "upb/stdc/error.h" // We can make this configurable if necessary. diff --git a/upb/symtab.c b/upb/symtab.c new file mode 100644 index 0000000..cd82bdd --- /dev/null +++ b/upb/symtab.c @@ -0,0 +1,326 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include "upb/symtab.h" + +#include +#include + +#include "upb/bytestream.h" + +bool upb_symtab_isfrozen(const upb_symtab *s) { + return upb_refcounted_isfrozen(upb_upcast(s)); +} + +void upb_symtab_ref(const upb_symtab *s, const void *owner) { + upb_refcounted_ref(upb_upcast(s), owner); +} + +void upb_symtab_unref(const upb_symtab *s, const void *owner) { + upb_refcounted_unref(upb_upcast(s), owner); +} + +void upb_symtab_donateref( + const upb_symtab *s, const void *from, const void *to) { + upb_refcounted_donateref(upb_upcast(s), from, to); +} + +void upb_symtab_checkref(const upb_symtab *s, const void *owner) { + upb_refcounted_checkref(upb_upcast(s), owner); +} + +static void upb_symtab_free(upb_refcounted *r) { + upb_symtab *s = (upb_symtab*)r; + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_def_unref(def, s); + } + upb_strtable_uninit(&s->symtab); + free(s); +} + +static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free}; + +upb_symtab *upb_symtab_new(const void *owner) { + upb_symtab *s = malloc(sizeof(*s)); + upb_refcounted_init(upb_upcast(s), &vtbl, owner); + upb_strtable_init(&s->symtab, UPB_CTYPE_PTR); + return s; +} + +const upb_def **upb_symtab_getdefs(const upb_symtab *s, upb_deftype_t type, + const void *owner, int *n) { + int total = upb_strtable_count(&s->symtab); + // We may only use part of this, depending on how many symbols are of the + // correct type. + const upb_def **defs = malloc(sizeof(*defs) * total); + upb_strtable_iter iter; + upb_strtable_begin(&iter, &s->symtab); + int i = 0; + for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); + assert(def); + if(type == UPB_DEF_ANY || def->type == type) + defs[i++] = def; + } + *n = i; + if (owner) + for(i = 0; i < *n; i++) upb_def_ref(defs[i], owner); + return defs; +} + +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, + const void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = v ? upb_value_getptr(*v) : NULL; + if (ret) upb_def_ref(ret, owner); + return ret; +} + +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, + const void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *def = v ? upb_value_getptr(*v) : NULL; + upb_msgdef *ret = NULL; + if(def && def->type == UPB_DEF_MSG) { + ret = upb_downcast_msgdef_mutable(def); + upb_def_ref(def, owner); + } + return ret; +} + +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_def *upb_resolvename(const upb_strtable *t, + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + const upb_value *v = upb_strtable_lookup(t, sym + 1); + return v ? upb_value_getptr(*v) : NULL; + } else { + // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. + (void)base; + assert(false); + return NULL; + } +} + +const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, + const char *sym, const void *owner) { + upb_def *ret = upb_resolvename(&s->symtab, base, sym); + if (ret) upb_def_ref(ret, owner); + return ret; +} + +// Searches def and its children to find defs that have the same name as any +// def in "addtab." Returns true if any where found, and as a side-effect adds +// duplicates of these defs into addtab. +// +// We use a modified depth-first traversal that traverses each SCC (which we +// already computed) as if it were a single node. This allows us to traverse +// the possibly-cyclic graph as if it were a DAG and to dup the correct set of +// nodes with O(n) time. +static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, + const void *new_owner, upb_inttable *seen, + upb_status *s) { + // Memoize results of this function for efficiency (since we're traversing a + // DAG this is not needed to limit the depth of the search). + const upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); + if (v) return upb_value_getbool(*v); + + // Visit submessages for all messages in the SCC. + bool need_dup = false; + const upb_def *base = def; + do { + assert(upb_def_isfrozen(def)); + if (def->type == UPB_DEF_FIELD) continue; + const upb_value *v = upb_strtable_lookup(addtab, upb_def_fullname(def)); + if (v) { + // Because we memoize we should not visit a node after we have dup'd it. + assert(((upb_def*)upb_value_getptr(*v))->came_from_user); + need_dup = true; + } + const upb_msgdef *m = upb_dyncast_msgdef(def); + if (m) { + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_fielddef_hassubdef(f)) continue; + // |= to avoid short-circuit; we need its side-effects. + need_dup |= upb_resolve_dfs( + upb_fielddef_subdef(f), addtab, new_owner, seen, s); + if (!upb_ok(s)) return false; + } + } + } while ((def = (upb_def*)def->base.next) != base); + + if (need_dup) { + // Dup any defs that don't already have entries in addtab. + def = base; + do { + if (def->type == UPB_DEF_FIELD) continue; + const char *name = upb_def_fullname(def); + if (upb_strtable_lookup(addtab, name) == NULL) { + upb_def *newdef = upb_def_dup(def, new_owner); + if (!newdef) goto oom; + newdef->came_from_user = false; + if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef))) + goto oom; + } + } while ((def = (upb_def*)def->base.next) != base); + } + + upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); + return need_dup; + +oom: + upb_status_seterrliteral(s, "out of memory"); + return false; +} + +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status) { + upb_def **add_defs = NULL; + upb_strtable addtab; + if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } + + // Add new defs to table. + for (int i = 0; i < n; i++) { + upb_def *def = defs[i]; + if (upb_def_isfrozen(def)) { + upb_status_seterrliteral(status, "added defs must be mutable"); + goto err; + } + assert(!upb_def_isfrozen(def)); + const char *fullname = upb_def_fullname(def); + if (!fullname) { + upb_status_seterrliteral( + status, "Anonymous defs cannot be added to a symtab"); + goto err; + } + if (upb_strtable_lookup(&addtab, fullname) != NULL) { + upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); + goto err; + } + // We need this to back out properly, because if there is a failure we need + // to donate the ref back to the caller. + def->came_from_user = true; + upb_def_donateref(def, ref_donor, s); + if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) + goto oom_err; + } + + // Add dups of any existing def that can reach a def with the same name as + // one of "defs." + upb_inttable seen; + if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err; + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_resolve_dfs(def, &addtab, s, &seen, status); + if (!upb_ok(status)) goto err; + } + upb_inttable_uninit(&seen); + + // Now using the table, resolve symbolic references. + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_msgdef *m = upb_dyncast_msgdef_mutable(def); + if (!m) continue; + // Type names are resolved relative to the message in which they appear. + const char *base = upb_def_fullname(upb_upcast(m)); + + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + const char *name = upb_fielddef_subdefname(f); + if (name) { + upb_def *subdef = upb_resolvename(&addtab, base, name); + if (subdef == NULL) { + upb_status_seterrf( + status, "couldn't resolve name '%s' in message '%s'", name, base); + goto err; + } else if (!upb_fielddef_setsubdef(f, subdef)) { + upb_status_seterrf( + status, "def '%s' had the wrong type for field '%s'", + upb_def_fullname(subdef), upb_fielddef_name(f)); + goto err; + } + } + + if (!upb_fielddef_resolvedefault(f)) { + upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); + size_t len; + const char *ptr = upb_byteregion_getptr(r, 0, &len); + upb_status_seterrf(status, "couldn't resolve enum default '%s'", ptr); + goto err; + } + } + } + + // We need an array of the defs in addtab, for passing to upb_def_freeze. + add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); + if (add_defs == NULL) goto oom_err; + upb_strtable_begin(&i, &addtab); + for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) { + add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); + } + + if (!upb_def_freeze(add_defs, n, status)) goto err; + + // This must be delayed until all errors have been detected, since error + // recovery code uses this table to cleanup defs. + upb_strtable_uninit(&addtab); + + // TODO(haberman) we don't properly handle errors after this point (like + // OOM in upb_strtable_insert() below). + for (int i = 0; i < n; i++) { + upb_def *def = add_defs[i]; + const char *name = upb_def_fullname(def); + upb_value v; + if (upb_strtable_remove(&s->symtab, name, &v)) { + const upb_def *def = upb_value_getptr(v); + upb_def_unref(def, s); + } + bool success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); + UPB_ASSERT_VAR(success, success == true); + } + free(add_defs); + return true; + +oom_err: + upb_status_seterrliteral(status, "out of memory"); +err: { + // For defs the user passed in, we need to donate the refs back. For defs + // we dup'd, we need to just unref them. + upb_strtable_iter i; + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + if (def->came_from_user) { + upb_def_donateref(def, s, ref_donor); + } else { + upb_def_unref(def, s); + } + def->came_from_user = false; + } + } + upb_strtable_uninit(&addtab); + free(add_defs); + assert(!upb_ok(status)); + return false; +} diff --git a/upb/symtab.h b/upb/symtab.h new file mode 100644 index 0000000..883324a --- /dev/null +++ b/upb/symtab.h @@ -0,0 +1,200 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A symtab (symbol table) stores a name->def map of upb_defs. Clients could + * always create such tables themselves, but upb_symtab has logic for resolving + * symbolic references, and in particular, for keeping a whole set of consistent + * defs when replacing some subset of those defs. This logic is nontrivial. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. + */ + +#ifndef UPB_SYMTAB_H_ +#define UPB_SYMTAB_H_ + +#ifdef __cplusplus +#include + +namespace upb { class SymbolTable; } +typedef upb::SymbolTable upb_symtab; +#else +struct upb_symtab; +typedef struct upb_symtab upb_symtab; +#endif + +#include "upb/def.h" + +#ifdef __cplusplus + +class upb::SymbolTable { + public: + // Returns a new symbol table with a single ref owned by "owner." + // Returns NULL if memory allocation failed. + static SymbolTable* New(const void* owner); + + // Though not declared as such in C++, upb::RefCounted is the base of + // SymbolTable and we can upcast to it. + RefCounted* Upcast(); + const RefCounted* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Resolves the given symbol using the rules described in descriptor.proto, + // namely: + // + // If the name starts with a '.', it is fully-qualified. Otherwise, + // C++-like scoping rules are used to find the type (i.e. first the nested + // types within this message are searched, then within the parent, on up + // to the root namespace). + // + // If a def is found, the caller owns one ref on the returned def, owned by + // owner. Otherwise returns NULL. + const Def* Resolve(const char* base, const char* sym, + const void* owner) const; + + // Finds an entry in the symbol table with this exact name. If a def is + // found, the caller owns one ref on the returned def, owned by owner. + // Otherwise returns NULL. + const Def* Lookup(const char *sym, const void *owner) const; + const MessageDef* LookupMessage(const char *sym, const void *owner) const; + + // Gets an array of pointers to all currently active defs in this symtab. + // The caller owns the returned array (which is of length *n) as well as a + // ref to each symbol inside (owned by owner). If type is UPB_DEF_ANY then + // defs of all types are returned, otherwise only defs of the required type + // are returned. + const Def** GetDefs(upb_deftype_t type, const void *owner, int *n) const; + + // Adds the given mutable defs to the symtab, resolving all symbols + // (including enum default values) and finalizing the defs. Only one def per + // name may be in the list, but defs can replace existing defs in the symtab. + // All defs must have a name -- anonymous defs are not allowed. Anonymous + // defs can still be frozen by calling upb_def_freeze() directly. + // + // Any existing defs that can reach defs that are being replaced will + // themselves be replaced also, so that the resulting set of defs is fully + // consistent. + // + // This logic implemented in this method is a convenience; ultimately it + // calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and + // upb_freeze(), any of which the client could call themself. However, since + // the logic for doing so is nontrivial, we provide it here. + // + // The entire operation either succeeds or fails. If the operation fails, + // the symtab is unchanged, false is returned, and status indicates the + // error. The caller passes a ref on all defs to the symtab (even if the + // operation fails). + // + // TODO(haberman): currently failure will leave the symtab unchanged, but may + // leave the defs themselves partially resolved. Does this matter? If so we + // could do a prepass that ensures that all symbols are resolvable and bail + // if not, so we don't mutate anything until we know the operation will + // succeed. + // + // TODO(haberman): since the defs must be mutable, refining a frozen def + // requires making mutable copies of the entire tree. This is wasteful if + // only a few messages are changing. We may want to add a way of adding a + // tree of frozen defs to the symtab (perhaps an alternate constructor where + // you pass the root of the tree?) + bool Add(Def*const* defs, int n, void* ref_donor, upb_status* status); + + bool Add(const std::vector& defs, void *owner, Status* status) { + return Add((Def*const*)&defs[0], defs.size(), owner, status); + } + + private: + UPB_DISALLOW_POD_OPS(SymbolTable); + +#else +struct upb_symtab { +#endif + upb_refcounted base; + upb_strtable symtab; +}; + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +// From upb_refcounted. +bool upb_symtab_isfrozen(const upb_symtab *s); +void upb_symtab_ref(const upb_symtab *s, const void *owner); +void upb_symtab_unref(const upb_symtab *s, const void *owner); +void upb_symtab_donateref( + const upb_symtab *s, const void *from, const void *to); +void upb_symtab_checkref(const upb_symtab *s, const void *owner); + +upb_symtab *upb_symtab_new(const void *owner); +const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, + const char *sym, const void *owner); +const upb_def *upb_symtab_lookup( + const upb_symtab *s, const char *sym, const void *owner); +const upb_msgdef *upb_symtab_lookupmsg( + const upb_symtab *s, const char *sym, const void *owner); +const upb_def **upb_symtab_getdefs( + const upb_symtab *s, upb_deftype_t type, const void *owner, int *n); +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ + +// C++ inline wrappers. +namespace upb { +inline SymbolTable* SymbolTable::New(const void* owner) { + return upb_symtab_new(owner); +} + +inline RefCounted* SymbolTable::Upcast() { return upb_upcast(this); } +inline const RefCounted* SymbolTable::Upcast() const { + return upb_upcast(this); +} +inline bool SymbolTable::IsFrozen() const { + return upb_symtab_isfrozen(this); +} +inline void SymbolTable::Ref(const void *owner) const { + upb_symtab_ref(this, owner); +} +inline void SymbolTable::Unref(const void *owner) const { + upb_symtab_unref(this, owner); +} +inline void SymbolTable::DonateRef(const void *from, const void *to) const { + upb_symtab_donateref(this, from, to); +} +inline void SymbolTable::CheckRef(const void *owner) const { + upb_symtab_checkref(this, owner); +} + +inline const Def* SymbolTable::Resolve( + const char* base, const char* sym, const void* owner) const { + return upb_symtab_resolve(this, base, sym, owner); +} +inline const Def* SymbolTable::Lookup( + const char *sym, const void *owner) const { + return upb_symtab_lookup(this, sym, owner); +} +inline const MessageDef* SymbolTable::LookupMessage( + const char *sym, const void *owner) const { + return upb_symtab_lookupmsg(this, sym, owner); +} +inline const Def** SymbolTable::GetDefs( + upb_deftype_t type, const void *owner, int *n) const { + return upb_symtab_getdefs(this, type, owner, n); +} +inline bool SymbolTable::Add( + Def*const* defs, int n, void* ref_donor, upb_status* status) { + return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status); +} +} // namespace upb +#endif + +#endif /* UPB_SYMTAB_H_ */ diff --git a/upb/table.c b/upb/table.c index 1cf944a..21457a0 100644 --- a/upb/table.c +++ b/upb/table.c @@ -5,14 +5,10 @@ * Author: Josh Haberman * * Implementation is heavily inspired by Lua's ltable.c. - * - * TODO: for table iteration we use (array - 1) in several places; is this - * undefined behavior? If so find a better solution. */ #include "upb/table.h" -#include #include #include @@ -35,47 +31,56 @@ int upb_log2(uint64_t v) { return UPB_MIN(UPB_MAXARRSIZE, ret); } +char *upb_strdup(const char *s) { + size_t n = strlen(s) + 1; + char *p = malloc(n); + if (p) memcpy(p, s, n); + return p; +} + static upb_tabkey upb_strkey(const char *str) { upb_tabkey k; k.str = (char*)str; return k; } -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); -typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); +typedef const upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2); /* Base table (shared code) ***************************************************/ -static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; } - static bool upb_table_isfull(upb_table *t) { return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; } -static bool upb_table_init(upb_table *t, uint8_t size_lg2) { +static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) { t->count = 0; + t->type = type; t->size_lg2 = size_lg2; + t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; size_t bytes = upb_table_size(t) * sizeof(upb_tabent); - t->mask = upb_table_size(t) - 1; - t->entries = malloc(bytes); - if (!t->entries) return false; - memset(t->entries, 0, bytes); + if (bytes > 0) { + t->entries = malloc(bytes); + if (!t->entries) return false; + memset((void*)t->entries, 0, bytes); + } else { + t->entries = NULL; + } return true; } -static void upb_table_uninit(upb_table *t) { free(t->entries); } - -static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } +static void upb_table_uninit(upb_table *t) { free((void*)t->entries); } -static upb_tabent *upb_table_emptyent(const upb_table *t) { - upb_tabent *e = t->entries + upb_table_size(t); +static upb_tabent *upb_table_emptyent(upb_table *t) { + upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t); while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } } -static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, - upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - upb_tabent *e = hash(t, key); +static const upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, + upb_hashfunc_t *hash, + upb_eqlfunc_t *eql) { + if (t->size_lg2 == 0) return NULL; + const upb_tabent *e = hash(t, key); if (upb_tabent_isempty(e)) return NULL; while (1) { if (eql(e->key, key)) return &e->val; @@ -86,14 +91,19 @@ static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, // The given key must not already exist in the table. static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - (void)eql; assert(upb_table_lookup(t, key, hash, eql) == NULL); + assert(val.type == t->type); t->count++; - upb_tabent *mainpos_e = hash(t, key); + upb_tabent *mainpos_e = (upb_tabent*)hash(t, key); upb_tabent *our_e = mainpos_e; - if (!upb_tabent_isempty(mainpos_e)) { // Collision. + if (upb_tabent_isempty(mainpos_e)) { + // Our main position is empty; use it. + our_e->next = NULL; + } else { + // Collision. upb_tabent *new_e = upb_table_emptyent(t); - upb_tabent *chain = hash(t, mainpos_e->key); // Head of collider's chain. + // Head of collider's chain. + upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key); if (chain == mainpos_e) { // Existing ent is in its main posisiton (it has the same hash as us, and // is the head of our chain). Insert to new ent and append to this chain. @@ -105,7 +115,10 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, // chain). This implies that no existing ent in the table has our hash. // Evict it (updating its chain) and use its ent for head of our chain. *new_e = *mainpos_e; // copies next. - while (chain->next != mainpos_e) chain = chain->next; + while (chain->next != mainpos_e) { + chain = (upb_tabent*)chain->next; + assert(chain); + } chain->next = new_e; our_e = mainpos_e; our_e->next = NULL; @@ -117,27 +130,35 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, } static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, + upb_tabkey *removed, upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - upb_tabent *chain = hash(t, key); + upb_tabent *chain = (upb_tabent*)hash(t, key); + if (upb_tabent_isempty(chain)) return false; if (eql(chain->key, key)) { + // Element to remove is at the head of its chain. t->count--; if (val) *val = chain->val; if (chain->next) { - upb_tabent *move = chain->next; + upb_tabent *move = (upb_tabent*)chain->next; *chain = *move; + *removed = move->key; move->key.num = 0; // Make the slot empty. } else { + *removed = chain->key; chain->key.num = 0; // Make the slot empty. } return true; } else { + // Element to remove is either in a non-head position or not in the table. while (chain->next && !eql(chain->next->key, key)) - chain = chain->next; + chain = (upb_tabent*)chain->next; if (chain->next) { // Found element to remove. if (val) *val = chain->next->val; - chain->next->key.num = 0; - chain->next = chain->next->next; + upb_tabent *remove = (upb_tabent*)chain->next; + *removed = remove->key; + remove->key.num = 0; + chain->next = remove->next; t->count--; return true; } else { @@ -146,13 +167,16 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, } } -static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) { - upb_tabent *end = t->entries + upb_table_size(t); +static const upb_tabent *upb_table_next(const upb_table *t, + const upb_tabent *e) { + const upb_tabent *end = t->entries + upb_table_size(t); do { if (++e == end) return NULL; } while(e->key.num == 0); return e; } -static upb_tabent *upb_table_begin(const upb_table *t) { +// TODO: is calculating t->entries - 1 undefined behavior? If so find a better +// solution. +static const upb_tabent *upb_table_begin(const upb_table *t) { return upb_table_next(t, t->entries - 1); } @@ -161,7 +185,7 @@ static upb_tabent *upb_table_begin(const upb_table *t) { // A simple "subclass" of upb_table that only adds a hash function for strings. -static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { +static const upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { // Could avoid the strlen() by using a hash function that terminates on NULL. return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask); } @@ -170,11 +194,13 @@ static bool upb_streql(upb_tabkey k1, upb_tabkey k2) { return strcmp(k1.str, k2.str) == 0; } -bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); } +bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) { + return upb_table_init(&t->t, type, 2); +} void upb_strtable_uninit(upb_strtable *t) { for (size_t i = 0; i < upb_table_size(&t->t); i++) - free(t->t.entries[i].key.str); + free((void*)t->t.entries[i].key.str); upb_table_uninit(&t->t); } @@ -182,7 +208,8 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { if (upb_table_isfull(&t->t)) { // Need to resize. New table of double the size, add old elements to it. upb_strtable new_table; - if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false; + if (!upb_table_init(&new_table.t, t->t.type, t->t.size_lg2 + 1)) + return false; upb_strtable_iter i; upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { @@ -192,15 +219,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { upb_strtable_uninit(t); *t = new_table; } - if ((k = strdup(k)) == NULL) return false; + if ((k = upb_strdup(k)) == NULL) return false; upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql); return true; } -upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { +const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql); } +bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) { + upb_tabkey removed; + bool found = upb_table_remove( + &t->t, upb_strkey(key), val, &removed, &upb_strhash, &upb_streql); + if (found) free((void*)removed.str); + return found; +} + void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { i->t = t; i->e = upb_table_begin(&t->t); @@ -224,8 +259,9 @@ size_t upb_inttable_count(const upb_inttable *t) { return t->t.count + t->array_count; } -bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) { - if (!upb_table_init(&t->t, hsize_lg2)) return false; +bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type, + size_t asize, int hsize_lg2) { + if (!upb_table_init(&t->t, type, hsize_lg2)) return false; // Always make the array part at least 1 long, so that we know key 0 // won't be in the hash part, which simplifies things. t->array_size = UPB_MAX(1, asize); @@ -236,17 +272,32 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) { upb_table_uninit(&t->t); return false; } - memset(t->array, 0xff, array_bytes); + memset((void*)t->array, 0xff, array_bytes); return true; } -bool upb_inttable_init(upb_inttable *t) { - return upb_inttable_sizedinit(t, 0, 4); +bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) { + return upb_inttable_sizedinit(t, type, 0, 4); } void upb_inttable_uninit(upb_inttable *t) { upb_table_uninit(&t->t); - free(t->array); + free((void*)t->array); +} + +static void upb_inttable_check(upb_inttable *t) { + UPB_UNUSED(t); +#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) + // This check is very expensive (makes inserts/deletes O(N)). + size_t count = 0; + upb_inttable_iter i; + upb_inttable_begin(&i, t); + for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { + const upb_value *v = upb_inttable_lookup(t, upb_inttable_iter_key(&i)); + assert(v); + } + assert(count == upb_inttable_count(t)); +#endif } bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { @@ -254,45 +305,78 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { if (key < t->array_size) { assert(!upb_arrhas(t->array[key])); t->array_count++; - t->array[key] = val; + ((upb_value*)t->array)[key] = val; } else { if (upb_table_isfull(&t->t)) { // Need to resize the hash part, but we re-use the array part. upb_table new_table; - if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false; - upb_tabent *e; + if (!upb_table_init(&new_table, t->t.type, t->t.size_lg2 + 1)) + return false; + const upb_tabent *e; for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e)) upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql); + + assert(t->t.count == new_table.count); + upb_table_uninit(&t->t); t->t = new_table; } upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); } + upb_inttable_check(t); return true; } -upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { +const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { if (key < t->array_size) { - upb_value *v = &t->array[key]; + const upb_value *v = &t->array[key]; return upb_arrhas(*v) ? v : NULL; } return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql); } bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { + bool success; if (key < t->array_size) { if (upb_arrhas(t->array[key])) { t->array_count--; if (val) *val = t->array[key]; - t->array[key] = upb_value_uint64(-1); - return true; + ((upb_value*)t->array)[key] = upb_value_uint64(-1); + success = true; } else { - return false; + success = false; } } else { - return upb_table_remove( - &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); + upb_tabkey removed; + success = upb_table_remove( + &t->t, upb_intkey(key), val, &removed, &upb_inthash, &upb_inteql); } + upb_inttable_check(t); + return success; +} + +bool upb_inttable_push(upb_inttable *t, upb_value val) { + return upb_inttable_insert(t, upb_inttable_count(t), val); +} + +upb_value upb_inttable_pop(upb_inttable *t) { + upb_value val; + bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val); + UPB_ASSERT_VAR(ok, ok); + return val; +} + +bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) { + return upb_inttable_insert(t, (uintptr_t)key, val); +} + +const upb_value *upb_inttable_lookupptr(const upb_inttable *t, + const void *key) { + return upb_inttable_lookup(t, (uintptr_t)key); +} + +bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { + return upb_inttable_remove(t, (uintptr_t)key, val); } void upb_inttable_compact(upb_inttable *t) { @@ -301,7 +385,10 @@ void upb_inttable_compact(upb_inttable *t) { upb_inttable_iter i; for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) counts[upb_log2(upb_inttable_iter_key(&i))]++; - int count = upb_inttable_count(t); + // Int part must always be at least 1 entry large to catch lookups of key 0. + // Key 0 must always be in the array part because "0" in the hash part + // denotes an empty entry. + int count = UPB_MAX(upb_inttable_count(t), 1); int size; for (size = UPB_MAXARRSIZE; size > 1; size--) { count -= counts[size]; @@ -311,7 +398,8 @@ void upb_inttable_compact(upb_inttable *t) { // Insert all elements into new, perfectly-sized table. upb_inttable new_table; int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD; - upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1); + + upb_inttable_sizedinit(&new_table, t->t.type, size, upb_log2(hashsize)); for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) upb_inttable_insert( &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i)); @@ -352,7 +440,7 @@ void upb_inttable_next(upb_inttable_iter *iter) { // 1. It will not work incrementally. // 2. It will not produce the same results on little-endian and big-endian // machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { +uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const uint32_t m = 0x5bd1e995; @@ -403,7 +491,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { +uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { const uint32_t m = 0x5bd1e995; const int32_t r = 24; const uint8_t * data = (const uint8_t *)key; diff --git a/upb/table.h b/upb/table.h index f6bff66..80f6813 100644 --- a/upb/table.h +++ b/upb/table.h @@ -17,13 +17,16 @@ * * This header is internal to upb; its interface should not be considered * public or stable. + * + * The table must be homogenous (all values of the same type). We currently + * enforce this on insert but store the full upb_value (with type) anyway. + * This is required with the current interface because lookups vend a pointer + * to the table's internal storage. */ #ifndef UPB_TABLE_H_ #define UPB_TABLE_H_ -#include -#include #include "upb.h" #ifdef __cplusplus @@ -32,45 +35,80 @@ extern "C" { typedef union { uintptr_t num; - char *str; // We own, nullz. + const char *str; // We own, nullz. } upb_tabkey; +#define UPB_TABKEY_NUM(n) {n} +#ifdef UPB_C99 +#define UPB_TABKEY_STR(s) {.str = s} +#endif +// TODO(haberman): C++ +#define UPB_TABKEY_NONE {0} + typedef struct _upb_tabent { upb_tabkey key; + // Storing a upb_value here wastes a bit of memory in debug mode because + // we are storing the type for each value even though we enforce that all + // values are the same. But since this only affects debug mode, we don't + // worry too much about it. The same applies to upb_inttable.array below. upb_value val; - struct _upb_tabent *next; // Internal chaining. + // Internal chaining. This is const so we can create static initializers for + // tables. We cast away const sometimes, but *only* when the containing + // upb_table is known to be non-const. This requires a bit of care, but + // the subtlety is confined to table.c. + const struct _upb_tabent *next; } upb_tabent; typedef struct { - upb_tabent *entries; // Hash table. size_t count; // Number of entries in the hash part. size_t mask; // Mask to turn hash value -> bucket. + upb_ctype_t type; // Type of all values. uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. + const upb_tabent *entries; // Hash table. } upb_table; typedef struct { upb_table t; } upb_strtable; +#define UPB_STRTABLE_INIT(count, mask, type, size_lg2, entries) \ + {{count, mask, type, size_lg2, entries}} + typedef struct { - upb_table t; // For entries that don't fit in the array part. - upb_value *array; // Array part of the table. - size_t array_size; // Array part size. - size_t array_count; // Array part number of elements. + upb_table t; // For entries that don't fit in the array part. + const upb_value *array; // Array part of the table. + size_t array_size; // Array part size. + size_t array_count; // Array part number of elements. } upb_inttable; -INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } +#define UPB_INTTABLE_INIT(count, mask, type, size_lg2, ent, a, asize, acount) \ + {{count, mask, type, size_lg2, ent}, a, asize, acount} -INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { - return t->entries + ((uint32_t)key.num & t->mask); +#define UPB_EMPTY_INTTABLE_INIT(type) \ + UPB_INTTABLE_INIT(0, 0, type, 0, NULL, NULL, 0, 0) + +#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(-1) + +INLINE size_t upb_table_size(const upb_table *t) { + if (t->size_lg2 == 0) + return 0; + else + return 1 << t->size_lg2; } +// Internal-only functions, in .h file only out of necessity. +INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } +INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } +INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { + return t->entries + ((uint32_t)key.num & t->mask); +} INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; } +uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); // Initialize and uninitialize a table, respectively. If memory allocation // failed, false is returned that the table is uninitialized. -bool upb_inttable_init(upb_inttable *table); -bool upb_strtable_init(upb_strtable *table); +bool upb_inttable_init(upb_inttable *table, upb_ctype_t type); +bool upb_strtable_init(upb_strtable *table, upb_ctype_t type); void upb_inttable_uninit(upb_inttable *table); void upb_strtable_uninit(upb_strtable *table); @@ -90,14 +128,24 @@ bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); // Looks up key in this table, returning a pointer to the table's internal copy // of the user's inserted data, or NULL if this key is not in the table. The -// user is free to modify the given upb_value, which will be reflected in any -// future lookups of this key. The returned pointer is invalidated by inserts. -upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); -upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); +// returned pointer is invalidated by inserts. +const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); +const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); // Removes an item from the table. Returns true if the remove was successful, // and stores the removed item in *val if non-NULL. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); +bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val); + +// Handy routines for treating an inttable like a stack. May not be mixed with +// other insert/remove calls. +bool upb_inttable_push(upb_inttable *t, upb_value val); +upb_value upb_inttable_pop(upb_inttable *t); + +// Convenience routines for inttables with pointer keys. +bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val); +bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); +const upb_value *upb_inttable_lookupptr(const upb_inttable *t, const void *key); // Optimizes the table for the current set of entries, for both memory use and // lookup time. Client should call this after all entries have been inserted; @@ -105,12 +153,15 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); void upb_inttable_compact(upb_inttable *t); // A special-case inlinable version of the lookup routine for 32-bit integers. -INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { +INLINE const upb_value *upb_inttable_lookup32(const upb_inttable *t, + uint32_t key) { if (key < t->array_size) { - upb_value *v = &t->array[key]; + const upb_value *v = &t->array[key]; return upb_arrhas(*v) ? v : NULL; } - for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + const upb_tabent *e; + if (t->t.entries == NULL) return NULL; + for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { if ((uint32_t)e->key.num == key) return &e->val; if (e->next == NULL) return NULL; } @@ -124,12 +175,12 @@ INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { // upb_strtable_begin(&i, t); // for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { // const char *key = upb_strtable_iter_key(&i); -// const myval *val = upb_strtable_iter_value(&i); +// const upb_value val = upb_strtable_iter_value(&i); // // ... // } typedef struct { const upb_strtable *t; - upb_tabent *e; + const upb_tabent *e; } upb_strtable_iter; void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); @@ -149,13 +200,15 @@ INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { // upb_inttable_iter i; // upb_inttable_begin(&i, t); // for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { +// uintptr_t key = upb_inttable_iter_key(&i); +// upb_value val = upb_inttable_iter_value(&i); // // ... // } typedef struct { const upb_inttable *t; union { - upb_tabent *ent; // For hash iteration. - upb_value *val; // For array iteration. + const upb_tabent *ent; // For hash iteration. + const upb_value *val; // For array iteration. } ptr; uintptr_t arrkey; bool array_part; diff --git a/upb/upb.c b/upb/upb.c index 5a00961..226fc78 100644 --- a/upb/upb.c +++ b/upb/upb.c @@ -29,24 +29,31 @@ void upb_status_uninit(upb_status *status) { free(status->buf); } -void upb_status_seterrf(upb_status *s, const char *msg, ...) { - s->code = UPB_ERROR; +bool upb_ok(const upb_status *status) { return !status->error; } +bool upb_eof(const upb_status *status) { return status->eof_; } + +void upb_status_seterrf(upb_status *status, const char *msg, ...) { + if (!status) return; + status->error = true; + status->space = NULL; va_list args; va_start(args, msg); - upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args); + upb_vrprintf(&status->buf, &status->bufsize, 0, msg, args); va_end(args); - s->str = s->buf; + status->str = status->buf; } void upb_status_seterrliteral(upb_status *status, const char *msg) { + if (!status) return; status->error = true; status->str = msg; status->space = NULL; } void upb_status_copy(upb_status *to, const upb_status *from) { + if (!to) return; to->error = from->error; - to->eof = from->eof; + to->eof_ = from->eof_; to->code = from->code; to->space = from->space; if (from->str == from->buf) { @@ -78,19 +85,26 @@ const char *upb_status_getstr(const upb_status *_status) { } void upb_status_clear(upb_status *status) { + if (!status) return; status->error = false; - status->eof = false; + status->eof_ = false; status->code = 0; status->space = NULL; status->str = NULL; } void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) { + if (!status) return; status->code = code; status->space = space; status->str = NULL; } +void upb_status_seteof(upb_status *status) { + if (!status) return; + status->eof_ = true; +} + int upb_vrprintf(char **buf, size_t *size, size_t ofs, const char *fmt, va_list args) { // Try once without reallocating. We have to va_copy because we might have diff --git a/upb/upb.h b/upb/upb.h index 245d86f..a7a3ed1 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -5,6 +5,9 @@ * Author: Josh Haberman * * This file contains shared definitions that are widely used across upb. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_H_ @@ -25,6 +28,28 @@ extern "C" { #define INLINE static inline #endif +#if __STDC_VERSION__ >= 199901L +#define UPB_C99 +#endif + +#if (defined(__cplusplus) && __cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X__) +#define UPB_CXX11 +#endif + +#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11) +#define UPB_DISALLOW_POD_OPS(class_name) \ + class_name() = delete; \ + ~class_name() = delete; \ + class_name(const class_name&) = delete; \ + void operator=(const class_name&) = delete; +#else +#define UPB_DISALLOW_POD_OPS(class_name) \ + class_name(); \ + ~class_name(); \ + class_name(const class_name&); \ + void operator=(const class_name&); +#endif + #ifdef __GNUC__ #define UPB_NORETURN __attribute__((__noreturn__)) #else @@ -32,12 +57,33 @@ extern "C" { #endif #ifndef UINT16_MAX -#define UINT16_MAX 65535 +#define UINT16_MAX 0xffff +#endif + +#ifndef UINT32_MAX +#define UINT32_MAX 0xffffffff #endif #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +// For our C-based inheritance, sometimes it's necessary to upcast an object to +// its base class. We try to minimize the need for this by replicating base +// class functions in the derived class -- the derived class functions simply +// forward to the base class implementations. This strategy simplifies the C++ +// API since we can't use real C++ inheritance. +#define upb_upcast(obj) (&(obj)->base) +#define upb_upcast2(obj) upb_upcast(upb_upcast(obj)) + +char *upb_strdup(const char *s); + +#define UPB_UNUSED(var) (void)var + +// For asserting something about a variable when the variable is not used for +// anything else. This prevents "unused variable" warnings when compiling in +// debug mode. +#define UPB_ASSERT_VAR(var, predicate) UPB_UNUSED(var); assert(predicate) + // The maximum that any submessages can be nested. Matches proto2's limit. // At the moment this specifies the size of several statically-sized arrays // and therefore setting it high will cause more memory to be used. Will @@ -45,19 +91,7 @@ extern "C" { // TODO: make this a runtime-settable property of upb_handlers. #define UPB_MAX_NESTING 64 -// The maximum number of fields that any one .proto type can have. Note that -// this is very different than the max field number. It is hard to imagine a -// scenario where more than 2k fields (each with its own name and field number) -// makes sense. The .proto file to describe it would be 2000 lines long and -// contain 2000 unique names. -// -// With this limit we can store a has-bit offset in 8 bits (2**8 * 8 = 2048) -// and we can store a value offset in 16 bits, since the maximum message -// size is 16,640 bytes (2**8 has-bits + 2048 * 8-byte value). Note that -// strings and arrays are not counted in this, only the *pointer* to them is. -// An individual string or array is unaffected by this 16k byte limit. -#define UPB_MAX_FIELDS (2048) - +// Inherent limit of protobuf wire format and schema definition. #define UPB_MAX_FIELDNUMBER ((1 << 29) - 1) // Nested type names are separated by periods. @@ -81,7 +115,99 @@ extern "C" { #define UPB_MAX_TYPE_DEPTH 64 -/* upb_value ******************************************************************/ +/* upb::Status ****************************************************************/ + +#ifdef __cplusplus +namespace upb { class Status; } +typedef upb::Status upb_status; +#else +struct upb_status; +typedef struct upb_status upb_status; +#endif + +typedef enum { + UPB_OK, // The operation completed successfully. + UPB_SUSPENDED, // The operation was suspended and may be resumed later. + UPB_ERROR, // An error occurred. +} upb_success_t; + +typedef struct { + const char *name; + // Writes a NULL-terminated string to "buf" containing an error message for + // the given error code, returning false if the message was too large to fit. + bool (*code_to_string)(int code, char *buf, size_t len); +} upb_errorspace; + +#ifdef __cplusplus + +class upb::Status { + public: + typedef upb_success_t Success; + + Status(); + ~Status(); + + bool ok(); + bool eof(); + + const char *GetString() const; + void SetEof(); + void SetErrorLiteral(const char* msg); + void Clear(); + + private: +#else +struct upb_status { +#endif + bool error; + bool eof_; + + // Specific status code defined by some error space (optional). + int code; + upb_errorspace *space; + + // Error message (optional). + const char *str; // NULL when no message is present. NULL-terminated. + char *buf; // Owned by the status. + size_t bufsize; +}; + +#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0} + +void upb_status_init(upb_status *status); +void upb_status_uninit(upb_status *status); + +bool upb_ok(const upb_status *status); +bool upb_eof(const upb_status *status); + +// Any of the functions that write to a status object allow status to be NULL, +// to support use cases where the function's caller does not care about the +// status message. +void upb_status_clear(upb_status *status); +void upb_status_seterrliteral(upb_status *status, const char *msg); +void upb_status_seterrf(upb_status *status, const char *msg, ...); +void upb_status_setcode(upb_status *status, upb_errorspace *space, int code); +void upb_status_seteof(upb_status *status); +// The returned string is invalidated by any other call into the status. +const char *upb_status_getstr(const upb_status *status); +void upb_status_copy(upb_status *to, const upb_status *from); + +// Like vasprintf (which allocates a string large enough for the result), but +// uses *buf (which can be NULL) as a starting point and reallocates it only if +// the new value will not fit. "size" is updated to reflect the allocated size +// of the buffer. Starts writing at the given offset into the string; bytes +// preceding this offset are unaffected. Returns the new length of the string, +// or -1 on memory allocation failure. +int upb_vrprintf(char **buf, size_t *size, size_t ofs, + const char *fmt, va_list args); + + +/* upb::Value *****************************************************************/ + +// TODO(haberman): upb::Value is gross and should be retired from the public +// interface (we *may* still want to keep it for internal use). upb::Handlers +// and upb::Def should replace their use of Value with one function for each C +// type. // Clients should not need to access these enum values; they are used internally // to do typechecks of upb_value accesses. @@ -93,13 +219,19 @@ typedef enum { UPB_CTYPE_DOUBLE = 5, UPB_CTYPE_FLOAT = 6, UPB_CTYPE_BOOL = 7, - UPB_CTYPE_PTR = 8, - UPB_CTYPE_BYTEREGION = 9, - UPB_CTYPE_FIELDDEF = 10, + UPB_CTYPE_CSTR = 8, + UPB_CTYPE_PTR = 9, + UPB_CTYPE_BYTEREGION = 10, + UPB_CTYPE_FIELDDEF = 11, } upb_ctype_t; -struct _upb_byteregion; -struct _upb_fielddef; +#ifdef __cplusplus +namespace upb { class ByteRegion; } +typedef upb::ByteRegion upb_byteregion; +#else +struct upb_byteregion; +typedef struct upb_byteregion upb_byteregion; +#endif // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. @@ -112,9 +244,10 @@ typedef struct { double _double; float _float; bool _bool; - void *_void; - struct _upb_byteregion *byteregion; - const struct _upb_fielddef *fielddef; + char *cstr; + void *ptr; + const void *constptr; + upb_byteregion *byteregion; } val; #ifndef NDEBUG @@ -124,12 +257,32 @@ typedef struct { #endif } upb_value; +#ifdef UPB_C99 +#define UPB_VAL_INIT(v, member) {.member = v} +#endif +// TODO(haberman): C++ + #ifdef NDEBUG #define SET_TYPE(dest, val) +#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member)} #else #define SET_TYPE(dest, val) dest = val +#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member), type} #endif +#define UPB_VALUE_INIT_INT32(v) UPB_VALUE_INIT(v, int32, UPB_CTYPE_INT32) +#define UPB_VALUE_INIT_INT64(v) UPB_VALUE_INIT(v, int64, UPB_CTYPE_INT64) +#define UPB_VALUE_INIT_UINT32(v) UPB_VALUE_INIT(v, uint32, UPB_CTYPE_UINT32) +#define UPB_VALUE_INIT_UINT64(v) UPB_VALUE_INIT(v, uint64, UPB_CTYPE_UINT64) +#define UPB_VALUE_INIT_DOUBLE(v) UPB_VALUE_INIT(v, _double, UPB_CTYPE_DOUBLE) +#define UPB_VALUE_INIT_FLOAT(v) UPB_VALUE_INIT(v, _float, UPB_CTYPE_FLOAT) +#define UPB_VALUE_INIT_BOOL(v) UPB_VALUE_INIT(v, _bool, UPB_CTYPE_BOOL) +#define UPB_VALUE_INIT_CSTR(v) UPB_VALUE_INIT(v, cstr, UPB_CTYPE_CSTR) +#define UPB_VALUE_INIT_PTR(v) UPB_VALUE_INIT(v, ptr, UPB_CTYPE_PTR) +#define UPB_VALUE_INIT_CONSTPTR(v) UPB_VALUE_INIT(v, constptr, UPB_CTYPE_PTR) +// Non-existent type, all reads will fail. +#define UPB_VALUE_INIT_NONE UPB_VALUE_INIT(NULL, ptr, -1) + // For each value type, define the following set of functions: // // // Get/set an int32 from a upb_value. @@ -174,12 +327,9 @@ ALL(int64, int64, int64_t, UPB_CTYPE_INT64); ALL(uint32, uint32, uint32_t, UPB_CTYPE_UINT32); ALL(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); ALL(bool, _bool, bool, UPB_CTYPE_BOOL); -ALL(ptr, _void, void*, UPB_CTYPE_PTR); -ALL(byteregion, byteregion, struct _upb_byteregion*, UPB_CTYPE_BYTEREGION); - -// upb_fielddef should never be modified from a callback -// (ie. when they're getting passed through a upb_value). -ALL(fielddef, fielddef, const struct _upb_fielddef*, UPB_CTYPE_FIELDDEF); +ALL(cstr, cstr, char*, UPB_CTYPE_CSTR); +ALL(ptr, ptr, void*, UPB_CTYPE_PTR); +ALL(byteregion, byteregion, upb_byteregion*, UPB_CTYPE_BYTEREGION); #ifdef __KERNEL__ // Linux kernel modules are compiled without SSE and therefore are incapable @@ -199,64 +349,55 @@ ALL(float, _float, float, UPB_CTYPE_FLOAT); extern upb_value UPB_NO_VALUE; +#ifdef __cplusplus +} // extern "C" -/* upb_status *****************************************************************/ - -typedef enum { - UPB_OK, // The operation completed successfully. - UPB_SUSPENDED, // The operation was suspended and may be resumed later. - UPB_ERROR, // An error occurred. -} upb_success_t; - -typedef struct { - const char *name; - // Writes a NULL-terminated string to "buf" containing an error message for - // the given error code, returning false if the message was too large to fit. - bool (*code_to_string)(int code, char *buf, size_t len); -} upb_errorspace; - -typedef struct { - bool error; - bool eof; - - // Specific status code defined by some error space (optional). - int code; - upb_errorspace *space; - - // Error message (optional). - const char *str; // NULL when no message is present. NULL-terminated. - char *buf; // Owned by the status. - size_t bufsize; -} upb_status; - -#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0} +namespace upb { -void upb_status_init(upb_status *status); -void upb_status_uninit(upb_status *status); +typedef upb_value Value; -INLINE bool upb_ok(const upb_status *status) { return !status->error; } -INLINE bool upb_eof(const upb_status *status) { return status->eof; } +template T GetValue(Value v); +template Value MakeValue(T v); -void upb_status_clear(upb_status *status); -void upb_status_seterrliteral(upb_status *status, const char *msg); -void upb_status_seterrf(upb_status *s, const char *msg, ...); -void upb_status_setcode(upb_status *s, upb_errorspace *space, int code); -INLINE void upb_status_seteof(upb_status *s) { s->eof = true; } -// The returned string is invalidated by any other call into the status. -const char *upb_status_getstr(const upb_status *s); -void upb_status_copy(upb_status *to, const upb_status *from); +#define UPB_VALUE_ACCESSORS(type, ctype) \ + template <> inline ctype GetValue(Value v) { \ + return upb_value_get ## type(v); \ + } \ + template <> inline Value MakeValue(ctype v) { \ + return upb_value_ ## type(v); \ + } -// Like vasprintf (which allocates a string large enough for the result), but -// uses *buf (which can be NULL) as a starting point and reallocates it only if -// the new value will not fit. "size" is updated to reflect the allocated size -// of the buffer. Starts writing at the given offset into the string; bytes -// preceding this offset are unaffected. Returns the new length of the string, -// or -1 on memory allocation failure. -int upb_vrprintf(char **buf, size_t *size, size_t ofs, - const char *fmt, va_list args); +UPB_VALUE_ACCESSORS(double, double); +UPB_VALUE_ACCESSORS(float, float); +UPB_VALUE_ACCESSORS(int32, int32_t); +UPB_VALUE_ACCESSORS(int64, int64_t); +UPB_VALUE_ACCESSORS(uint32, uint32_t); +UPB_VALUE_ACCESSORS(uint64, uint64_t); +UPB_VALUE_ACCESSORS(bool, bool); + +#undef UPB_VALUE_ACCESSORS + +template inline T* GetPtrValue(Value v) { + return static_cast(upb_value_getptr(v)); +} +template inline Value MakePtrValue(T* v) { + return upb_value_ptr(static_cast(v)); +} + +// C++ Wrappers +inline Status::Status() { upb_status_init(this); } +inline Status::~Status() { upb_status_uninit(this); } +inline bool Status::ok() { return upb_ok(this); } +inline bool Status::eof() { return upb_eof(this); } +inline const char *Status::GetString() const { return upb_status_getstr(this); } +inline void Status::SetEof() { upb_status_seteof(this); } +inline void Status::SetErrorLiteral(const char* msg) { + upb_status_seterrliteral(this, msg); +} +inline void Status::Clear() { upb_status_clear(this); } + +} // namespace upb -#ifdef __cplusplus -} /* extern "C" */ #endif #endif /* UPB_H_ */ -- cgit v1.2.3